From fa7ee4f89fa92a50a9891fe74834d0fb6345d31a Mon Sep 17 00:00:00 2001
From: Michael Dumelle
Date: Thu, 20 Nov 2025 12:45:54 -0800
Subject: [PATCH 1/9] version update
---
DESCRIPTION | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index 8f203cb..18f3533 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: spmodel
Title: Spatial Statistical Modeling and Prediction
-Version: 0.11.1
+Version: 0.12.0
Authors@R: c(
person(given = "Michael",
family = "Dumelle",
From 09abe3e09201de98374936c7c267ea5727523dce Mon Sep 17 00:00:00 2001
From: Michael Dumelle
Date: Thu, 20 Nov 2025 12:48:32 -0800
Subject: [PATCH 2/9] some bk improvements
---
NEWS.md | 8 ++++++++
R/get_local_list.R | 15 ++++++++++++---
R/predict.R | 2 +-
R/predict_block.R | 26 +++++++++++++++++++++++++-
man/predict.spmodel.Rd | 2 +-
5 files changed, 47 insertions(+), 6 deletions(-)
diff --git a/NEWS.md b/NEWS.md
index 02cfd15..5ca8933 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,11 @@
+# spmodel 0.12.0
+
+## Minor Updates
+
+* Changed the default `size` argument to the local argument in `predict(..., block = TRUE)` and `augment(..., block = TRUE)` from 1000 to 4000. This enhances the block prediction (i.e., Kriging) approximation's accuracy but can slightly increase computational complexity.
+* Enhanced efficiency of block prediction (i.e., Kriging) for large `newdata` objects.
+* Minor documentation updates.
+
# spmodel 0.11.1
## Minor Updates
diff --git a/R/get_local_list.R b/R/get_local_list.R
index 052e83b..fbd8578 100644
--- a/R/get_local_list.R
+++ b/R/get_local_list.R
@@ -168,7 +168,7 @@ get_local_list_prediction_block <- function(local) {
if (is.logical(local)) {
if (local) {
- local <- list(method = "covariance", size = 1000)
+ local <- list(method = "covariance", size = 4000)
} else {
local <- list(method = "all")
}
@@ -190,10 +190,19 @@ get_local_list_prediction_block <- function(local) {
}
if (local$method %in% c("distance", "covariance") && !"size" %in% names_local) {
- local$size <- 1000
+ local$size <- 4000
}
- # no parallel required
+ if (!"parallel" %in% names_local) {
+ local$parallel <- FALSE
+ local$ncores <- NULL
+ }
+
+ if (local$parallel) {
+ if (!"ncores" %in% names_local) {
+ local$ncores <- parallel::detectCores()
+ }
+ }
local
}
diff --git a/R/predict.R b/R/predict.R
index d011a0c..fd1170b 100644
--- a/R/predict.R
+++ b/R/predict.R
@@ -73,7 +73,7 @@
#' If \code{block} is \code{TRUE}, \code{local} accepts \code{method} and \code{size},
#' and \code{method} takes values of \code{"all"}, \code{"covariance"},
#' and \code{"distance"}, similar as when \code{block} is \code{FALSE}.
-#' The default \code{method} is \code{"distance"} with size \code{1000}. This default \code{size} is
+#' The default \code{method} is \code{"covariance"} with size \code{4000}. This default \code{size} is
#' much larger than when \code{block} is \code{FALSE}. This is because when \code{block} is \code{TRUE},
#' covariances and explanatory variables are averaged before prediction, which greatly
#' reduces computational burden, only requiring the Cholesky decomposition
diff --git a/R/predict_block.R b/R/predict_block.R
index bf458f4..24c87cf 100644
--- a/R/predict_block.R
+++ b/R/predict_block.R
@@ -143,7 +143,11 @@ predict_block_splm <- function(object, newdata, se.fit, scale, df, interval, lev
c0 <- colMeans(covmatrix(object, newdata = newdata, cov_type = "pred.obs"))
Sig <- covmatrix(object)
- s0 <- mean(covmatrix(object, newdata = newdata, cov_type = "pred.pred"))
+ if (NROW(newdata) > 1e4) {
+ s0 <- get_bk_var(object, newdata, local)
+ } else {
+ s0 <- mean(covmatrix(object, newdata = newdata, cov_type = "pred.pred"))
+ }
Xmat <- model.matrix(object)
if (local$method == "all") {
@@ -234,3 +238,23 @@ predict_block_splm <- function(object, newdata, se.fit, scale, df, interval, lev
stop("Interval must be none, confidence, or prediction")
}
}
+
+get_bk_var <- function(object, newdata, local) {
+ index <- seq(1, NROW(newdata))
+ object$obdata <- newdata
+ if (local$parallel) {
+ cl <- parallel::makeCluster(local$ncores)
+ val <- parallel::parLapply(cl, index, get_each_bk_meancov, object, newdata)
+ cl <- parallel::stopCluster(cl)
+ } else {
+ val <- lapply(index, get_each_bk_meancov, object, newdata)
+ }
+ mean(unlist(val))
+}
+
+get_each_bk_meancov <- function(index, object, newdata) {
+ newdata <- newdata[index, , drop = FALSE]
+ val <- as.vector(spmodel::covmatrix(object, newdata = newdata, cov_type = "obs.pred"))
+ val[index] <- val[index] + object$coefficients$spcov[["ie"]] # this is to add ie variance to diagonal (which is omitted with newdata)
+ mean(val)
+}
diff --git a/man/predict.spmodel.Rd b/man/predict.spmodel.Rd
index 72a4378..30a9d13 100644
--- a/man/predict.spmodel.Rd
+++ b/man/predict.spmodel.Rd
@@ -235,7 +235,7 @@ that \code{local} is transformed into
If \code{block} is \code{TRUE}, \code{local} accepts \code{method} and \code{size},
and \code{method} takes values of \code{"all"}, \code{"covariance"},
and \code{"distance"}, similar as when \code{block} is \code{FALSE}.
-The default \code{method} is \code{"distance"} with size \code{1000}. This default \code{size} is
+The default \code{method} is \code{"covariance"} with size \code{4000}. This default \code{size} is
much larger than when \code{block} is \code{FALSE}. This is because when \code{block} is \code{TRUE},
covariances and explanatory variables are averaged before prediction, which greatly
reduces computational burden, only requiring the Cholesky decomposition
From 5f130f1cfb00b6d1ad380ab845cf2e55e26979a8 Mon Sep 17 00:00:00 2001
From: Michael Dumelle
Date: Fri, 21 Nov 2025 11:11:54 -0800
Subject: [PATCH 3/9] remove unncessary formula argument in get_esv_cloud()
---
R/esv.R | 2 +-
R/get_esv.R | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/R/esv.R b/R/esv.R
index b392dd9..71a0306 100644
--- a/R/esv.R
+++ b/R/esv.R
@@ -170,7 +170,7 @@ esv <- function(formula, data, xcoord, ycoord, cloud = FALSE, robust = FALSE, bi
residual_vector2 <- residual_vector^2
if (cloud) {
- esv_out <- get_esv_cloud(residual_vector2, dist_vector, formula)
+ esv_out <- get_esv_cloud(residual_vector2, dist_vector)
} else {
if (robust) {
residual_vector12 <- sqrt(residual_vector)
diff --git a/R/get_esv.R b/R/get_esv.R
index 6ad4f78..0933fd7 100644
--- a/R/get_esv.R
+++ b/R/get_esv.R
@@ -62,7 +62,7 @@ get_esv_robust <- function(residual_vector12, dist_vector, bins, cutoff, formula
esv_out
}
-get_esv_cloud <- function(residual_vector2, dist_vector, formula) {
+get_esv_cloud <- function(residual_vector2, dist_vector) {
esv_out <- tibble::tibble(dist = dist_vector, gamma = residual_vector2 / 2)
From da80d71a920d86e5478dc4b9bcf445d48ad8b2a6 Mon Sep 17 00:00:00 2001
From: Michael Dumelle
Date: Fri, 21 Nov 2025 11:40:50 -0800
Subject: [PATCH 4/9] add eacf
---
NAMESPACE | 2 +
NEWS.md | 4 +
R/eacf.R | 204 +++++++++++++++++++++++++++++++++++
R/get_eacf.R | 41 +++++++
R/get_eacf_dotlist.R | 68 ++++++++++++
man/eacf.Rd | 89 +++++++++++++++
tests/testthat/test-extras.R | 92 ++++++++++++++++
tests/testthat/test-spglm.R | 9 ++
tests/testthat/test-splm.R | 9 ++
9 files changed, 518 insertions(+)
create mode 100644 R/eacf.R
create mode 100644 R/get_eacf.R
create mode 100644 R/get_eacf_dotlist.R
create mode 100644 man/eacf.Rd
diff --git a/NAMESPACE b/NAMESPACE
index be4ce1d..f504a1a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -188,6 +188,7 @@ S3method(model.matrix,spautor)
S3method(model.matrix,spgautor)
S3method(model.matrix,spglm)
S3method(model.matrix,splm)
+S3method(plot,eacf)
S3method(plot,esv)
S3method(plot,spautor)
S3method(plot,spgautor)
@@ -359,6 +360,7 @@ export(augment)
export(covmatrix)
export(dispersion_initial)
export(dispersion_params)
+export(eacf)
export(esv)
export(glance)
export(glances)
diff --git a/NEWS.md b/NEWS.md
index 5ca8933..0ff0240 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
# spmodel 0.12.0
+## Major Updates
+
+* Added `eacf()` to compute the empirical autocovariance function.
+
## Minor Updates
* Changed the default `size` argument to the local argument in `predict(..., block = TRUE)` and `augment(..., block = TRUE)` from 1000 to 4000. This enhances the block prediction (i.e., Kriging) approximation's accuracy but can slightly increase computational complexity.
diff --git a/R/eacf.R b/R/eacf.R
new file mode 100644
index 0000000..63c5ae6
--- /dev/null
+++ b/R/eacf.R
@@ -0,0 +1,204 @@
+#' Compute the empirical autocovariance
+#'
+#' @description Compute the empirical autocovariance (i.e., empirical covariance)
+#' for varying bin sizes and cutoff values.
+#'
+#' @param formula A formula describing the fixed effect structure.
+#' @param data A data frame or \code{sf} object containing the variables in \code{formula}
+#' and geographic information.
+#' @param xcoord Name of the variable in \code{data} representing the x-coordinate.
+#' Can be quoted or unquoted. Not required if \code{data} is an \code{sf} object.
+#' @param ycoord Name of the variable in \code{data} representing the y-coordinate.
+#' Can be quoted or unquoted. Not required if \code{data} is an \code{sf} object.
+#' @param cloud A logical indicating whether the empirical autocovariance should
+#' be summarized by distance class or not. When \code{cloud = FALSE} (the default), pairwise autocovariances
+#' are binned and averaged within distance classes. When \code{cloud} = TRUE,
+#' all pairwise autocovariances and distances are returned (this is known as
+#' the "cloud" autocovariance).
+#' @param bins The number of equally spaced bins. The default is 15. Ignored if
+#' \code{cloud = TRUE}.
+#' @param cutoff The maximum distance considered.
+#' The default is half the diagonal of the bounding box from the coordinates.
+#' @param dist_matrix A distance matrix to be used instead of providing coordinate names.
+#' @param partition_factor An optional formula specifying the partition factor.
+#' If specified, autocovariances are only computed for observations sharing the
+#' same level of the partition factor.
+#'
+#' @details The empirical autocovariance (i.e., empirical covariance) is a tool used to visualize and model
+#' spatial dependence by estimating the semivariance of a process at varying distances.
+#' For a constant-mean process, the
+#' autocovariance at distance \eqn{h} is denoted \eqn{Cov(h)} and defined as
+#' \eqn{Cov(z1, z2)}. Under second-order stationarity,
+#' \eqn{Cov(h) = Cov(0) - \gamma(h)}, where \eqn{gamma(h)} is the semivariance function at distance \code{h}. Typically the residuals from an ordinary
+#' least squares fit defined by \code{formula} are second-order stationary with
+#' mean zero. These residuals are used to compute the empirical autocovariance
+#' At a distance \code{h}, the empirical autocovariance is
+#' \eqn{1/N(h) \sum (r1 \times r2)}, where \eqn{N(h)} is the number of (unique)
+#' pairs in the set of observations whose distance separation is \code{h} and
+#' \code{r1} and \code{r2} are residuals corresponding to observations whose
+#' distance separation is \code{h}. In spmodel, these distance bins actually
+#' contain observations whose distance separation is \code{h +- c},
+#' where \code{c} is a constant determined implicitly by \code{bins}. Typically,
+#' only observations whose distance separation is below some cutoff are used
+#' to compute the empirical semivariogram (this cutoff is determined by \code{cutoff}).
+#'
+#' @name eacf
+#'
+#' @return If \code{cloud = FALSE}, a tibble (data.frame) with distance bins
+#' (\code{bins}), the average distance (\code{dist}), the average autocovariance (\code{acov}), and the
+#' number of (unique) pairs (\code{np}). If \code{cloud = TRUE}, a tibble
+#' (data.frame) with distance (\code{dist}) and autocovariance (\code{acov})
+#' for each unique pair.
+#'
+#' @export
+#'
+#' @examples
+#' eacf(sulfate ~ 1, sulfate)
+#' plot(eacf(sulfate ~ 1, sulfate))
+eacf <- function(formula, data, xcoord, ycoord, cloud = FALSE, bins = 15, cutoff, dist_matrix, partition_factor) {
+
+ # filter out missing response values
+ na_index <- is.na(data[[all.vars(formula)[1]]])
+ data <- data[!na_index, , drop = FALSE]
+ # finding model frame
+ data_model_frame <- model.frame(formula, data, drop.unused.levels = TRUE, na.action = na.pass)
+ # model matrix with potential NA
+ ob_predictors <- complete.cases(model.matrix(formula, data_model_frame))
+ if (any(!ob_predictors)) {
+ stop("Cannot have NA values in predictors.", call. = FALSE)
+ }
+
+ # covert sp to sf
+ attr_sp <- attr(class(data), "package")
+ if (!is.null(attr_sp) && length(attr_sp) == 1 && attr_sp == "sp") {
+ stop("sf objects must be used instead of sp objects. To convert your sp object into an sf object, run sf::st_as_sf().", call. = FALSE)
+ }
+
+ ## convert sf to data frame (point geometry) (1d objects obsolete)
+ ### see if data has sf class
+ if (inherits(data, "sf")) {
+ data <- suppressWarnings(sf::st_centroid(data))
+ data <- sf_to_df(data)
+ ### name xcoord ".xcoord" to be used later
+ xcoord <- ".xcoord"
+ ### name ycoord ".ycoord" to be used later
+ ycoord <- ".ycoord"
+ }
+
+ # compute spatial distances
+ if (missing(dist_matrix)) {
+ # non standard evaluation for the x and y coordinates
+ xcoord <- substitute(xcoord)
+ ycoord <- substitute(ycoord)
+
+ if (missing(xcoord)) {
+ stop("The xcoord argument must be specified.", call. = FALSE)
+ }
+
+ if (!missing(xcoord)) {
+ if (!as.character(xcoord) %in% colnames(data)) {
+ stop("The xcoord argument must match the name of a variable in data.", call. = FALSE)
+ }
+ }
+
+ if (missing(ycoord)) {
+ dist_matrix <- spdist(data, xcoord)
+ } else {
+ if (!as.character(ycoord) %in% colnames(data)) {
+ stop("The ycoord argument must match the name of a variable in data.", call. = FALSE)
+ }
+ dist_matrix <- spdist(data, xcoord, ycoord)
+ }
+ }
+
+
+ dist_matrix <- as.matrix(dist_matrix)
+ dist_matrix <- dist_matrix[upper.tri(dist_matrix)]
+
+ if (any(dist_matrix == 0)) {
+ warning("Zero distances observed between at least one pair. Ignoring pairs. If using splm(), consider a different estimation method.", call. = FALSE)
+ }
+
+ if (missing(partition_factor)) {
+ partition_factor <- NULL
+ }
+
+ if (!is.null(partition_factor)) {
+ # partition_matrix_val <- triu(partition_matrix(partition_factor, data = data), k = 1)
+ partition_matrix_val <- as.matrix(partition_matrix(partition_factor, data = data))
+ partition_matrix_val <- partition_matrix_val[upper.tri(partition_matrix_val)]
+ dist_matrix <- dist_matrix * partition_matrix_val
+ }
+
+ if (missing(cutoff)) {
+ cutoff <- NULL
+ }
+ if (is.null(cutoff)) {
+ cutoff <- max(dist_matrix) / 2
+ }
+
+ dist_vector <- dist_matrix
+ if (any(dist_vector == 0)) {
+ dist_index <- dist_vector > 0 & dist_vector <= cutoff
+ } else {
+ dist_index <- dist_vector <= cutoff
+ }
+
+ dist_vector <- dist_vector[dist_index]
+
+ # compute squared differences in the residuals
+ lmod <- lm(formula = formula, data = data)
+ residuals <- residuals(lmod)
+ residual_matrix <- outer(residuals, residuals) # ybar is the fitted mean
+ residual_matrix <- residual_matrix[upper.tri(residual_matrix)]
+ if (!is.null(partition_factor)) {
+ residual_matrix <- residual_matrix * partition_matrix_val
+ }
+
+ residual_vector <- residual_matrix
+ residual_vector <- residual_vector[dist_index]
+ residual_vector2 <- residual_vector # just to align with esv() names
+
+
+ if (cloud) {
+ eacf_out <- get_eacf_cloud(residual_vector2, dist_vector)
+ } else {
+ eacf_out <- get_eacf(residual_vector2, dist_vector, bins, cutoff)
+ }
+
+
+
+
+ # remove NA
+ # eacf_out <- na.omit(eacf_out)
+ eacf_out <- structure(eacf_out, class = c("eacf", class(eacf_out)), call = match.call(), cloud = cloud)
+ eacf_out
+}
+
+#' @rdname eacf
+#' @method plot eacf
+#' @param x An object from \code{eacf()}.
+#' @param ... Other arguments passed to other methods.
+#' @export
+plot.eacf <- function(x, ...) {
+
+ cal <- attr(x, "call")
+ if (!is.na(m.f <- match("formula", names(cal)))) {
+ cal <- cal[c(1, m.f)]
+ names(cal)[2L] <- ""
+ }
+ cc <- deparse(cal, 80)
+ nc <- nchar(cc[1L], "c")
+ abbr <- length(cc) > 1 || nc > 75
+ sub.caption <- if (abbr) {
+ paste(substr(cc[1L], 1L, min(75L, nc)), "...")
+ } else {
+ cc[1L]
+ }
+
+ dotlist <- list(...)
+ dotlist <- get_eacf_dotlist_defaults(x, dotlist, cloud = attr(x, "cloud"))
+
+ do.call("plot", c(list(x = x$dist, y = x$acov), dotlist))
+ title(sub = sub.caption)
+}
diff --git a/R/get_eacf.R b/R/get_eacf.R
new file mode 100644
index 0000000..e10bff8
--- /dev/null
+++ b/R/get_eacf.R
@@ -0,0 +1,41 @@
+get_eacf <- function(residual_vector2, dist_vector, bins, cutoff, formula) {
+
+ # compute semivariogram classes
+ dist_classes <- cut(dist_vector, breaks = seq(0, cutoff, length.out = bins + 1))
+
+ # compute squared differences within each class
+ acov <- tapply(residual_vector2, dist_classes, function(x) mean(x))
+
+ # compute pairs within each class
+ np <- tapply(residual_vector2, dist_classes, length)
+
+ # set as zero if necessary
+ np <- ifelse(is.na(np), 0, np)
+
+ # compute average distance within each class
+ dist <- tapply(dist_vector, dist_classes, mean)
+
+ # return output
+ eacf_out <- tibble::tibble(
+ bins = factor(levels(dist_classes), levels = levels(dist_classes)),
+ dist = as.numeric(dist),
+ acov = as.numeric(acov),
+ np = as.numeric(np)
+ )
+
+ # set row names to NULL
+ # row.names(eacf_out) <- NULL
+
+ eacf_out
+}
+
+
+get_eacf_cloud <- function(residual_vector2, dist_vector) {
+
+ eacf_out <- tibble::tibble(dist = dist_vector, acov = residual_vector2)
+
+ # set row names to NULL
+ # row.names(eacf_out) <- NULL
+
+ eacf_out
+}
diff --git a/R/get_eacf_dotlist.R b/R/get_eacf_dotlist.R
new file mode 100644
index 0000000..6e051a9
--- /dev/null
+++ b/R/get_eacf_dotlist.R
@@ -0,0 +1,68 @@
+#' Get empirical autocovariance dotlist
+#'
+#' @param ... Additional arguments to \code{eacf()}
+#' @param max_halfdist
+#'
+#' @return An eacf dotlist
+#'
+#' @noRd
+get_eacf_dotlist <- function(..., max_halfdist) {
+ # storing dotlist and setting defaults for eacf
+ dotlist <- list(...)
+
+ if (!("bins" %in% names(dotlist))) {
+ dotlist$bins <- 15
+ }
+
+ if (!("cutoff" %in% names(dotlist))) {
+ dotlist$cutoff <- max_halfdist
+ }
+
+ # make dotlist eacf
+ dotlist_eacf <- list(bins = dotlist$bins, cutoff = dotlist$cutoff)
+}
+
+
+get_eacf_dotlist_defaults <- function(x, dotlist, cloud) {
+
+ names_dotlist <- names(dotlist)
+
+ # set defaults
+ if (!"main" %in% names_dotlist) {
+ dotlist$main <- "Empirical Autocovariance"
+ if (cloud) dotlist$main <- paste0(dotlist$main, " (Cloud)")
+ }
+
+ if (!"xlab" %in% names_dotlist) {
+ dotlist$xlab <- "Distance"
+ }
+
+ if (!"ylab" %in% names_dotlist) {
+ dotlist$ylab <- "Autocovariance"
+ }
+
+ if (!cloud && !"pch" %in% names_dotlist) {
+ dotlist$pch <- 19
+ }
+
+ if (!cloud && !"cex" %in% names_dotlist) {
+ dotlist$cex <- (x$np - min(x$np)) / (max(x$np) - min(x$np)) * 2 + 1
+ }
+
+ if (!"ylim" %in% names_dotlist) {
+
+ # include zero if not in limits
+
+ ## all greater than zero (positive)
+ if (all(x$acov > 0)) {
+ dotlist$ylim <- c(0, 1.1 * max(x$acov))
+ }
+
+ ## all less than zero (negative)
+ if (all(x$acov < 0)) {
+ dotlist$ylim <- c(1.1 * min(x$acov), 0)
+ }
+ }
+
+ dotlist
+}
diff --git a/man/eacf.Rd b/man/eacf.Rd
new file mode 100644
index 0000000..c9cefe0
--- /dev/null
+++ b/man/eacf.Rd
@@ -0,0 +1,89 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/eacf.R
+\name{eacf}
+\alias{eacf}
+\alias{plot.eacf}
+\title{Compute the empirical autocovariance}
+\usage{
+eacf(
+ formula,
+ data,
+ xcoord,
+ ycoord,
+ cloud = FALSE,
+ bins = 15,
+ cutoff,
+ dist_matrix,
+ partition_factor
+)
+
+\method{plot}{eacf}(x, ...)
+}
+\arguments{
+\item{formula}{A formula describing the fixed effect structure.}
+
+\item{data}{A data frame or \code{sf} object containing the variables in \code{formula}
+and geographic information.}
+
+\item{xcoord}{Name of the variable in \code{data} representing the x-coordinate.
+Can be quoted or unquoted. Not required if \code{data} is an \code{sf} object.}
+
+\item{ycoord}{Name of the variable in \code{data} representing the y-coordinate.
+Can be quoted or unquoted. Not required if \code{data} is an \code{sf} object.}
+
+\item{cloud}{A logical indicating whether the empirical autocovariance should
+be summarized by distance class or not. When \code{cloud = FALSE} (the default), pairwise autocovariances
+are binned and averaged within distance classes. When \code{cloud} = TRUE,
+all pairwise autocovariances and distances are returned (this is known as
+the "cloud" autocovariance).}
+
+\item{bins}{The number of equally spaced bins. The default is 15. Ignored if
+\code{cloud = TRUE}.}
+
+\item{cutoff}{The maximum distance considered.
+The default is half the diagonal of the bounding box from the coordinates.}
+
+\item{dist_matrix}{A distance matrix to be used instead of providing coordinate names.}
+
+\item{partition_factor}{An optional formula specifying the partition factor.
+If specified, autocovariances are only computed for observations sharing the
+same level of the partition factor.}
+
+\item{x}{An object from \code{eacf()}.}
+
+\item{...}{Other arguments passed to other methods.}
+}
+\value{
+If \code{cloud = FALSE}, a tibble (data.frame) with distance bins
+(\code{bins}), the average distance (\code{dist}), the average autocovariance (\code{acov}), and the
+number of (unique) pairs (\code{np}). If \code{cloud = TRUE}, a tibble
+(data.frame) with distance (\code{dist}) and autocovariance (\code{acov})
+for each unique pair.
+}
+\description{
+Compute the empirical autocovariance (i.e., empirical covariance)
+for varying bin sizes and cutoff values.
+}
+\details{
+The empirical autocovariance (i.e., empirical covariance) is a tool used to visualize and model
+spatial dependence by estimating the semivariance of a process at varying distances.
+For a constant-mean process, the
+autocovariance at distance \eqn{h} is denoted \eqn{Cov(h)} and defined as
+\eqn{Cov(z1, z2)}. Under second-order stationarity,
+\eqn{Cov(h) = Cov(0) - \gamma(h)}, where \eqn{gamma(h)} is the semivariance function at distance \code{h}. Typically the residuals from an ordinary
+least squares fit defined by \code{formula} are second-order stationary with
+mean zero. These residuals are used to compute the empirical autocovariance
+At a distance \code{h}, the empirical autocovariance is
+\eqn{1/N(h) \sum (r1 \times r2)}, where \eqn{N(h)} is the number of (unique)
+pairs in the set of observations whose distance separation is \code{h} and
+\code{r1} and \code{r2} are residuals corresponding to observations whose
+distance separation is \code{h}. In spmodel, these distance bins actually
+contain observations whose distance separation is \code{h +- c},
+where \code{c} is a constant determined implicitly by \code{bins}. Typically,
+only observations whose distance separation is below some cutoff are used
+to compute the empirical semivariogram (this cutoff is determined by \code{cutoff}).
+}
+\examples{
+eacf(sulfate ~ 1, sulfate)
+plot(eacf(sulfate ~ 1, sulfate))
+}
diff --git a/tests/testthat/test-extras.R b/tests/testthat/test-extras.R
index 9107633..537bea8 100644
--- a/tests/testthat/test-extras.R
+++ b/tests/testthat/test-extras.R
@@ -445,6 +445,98 @@ if (test_local) {
expect_error(deviance(spmod))
})
+ ##############################################################################
+ ############################ eacf (test-acf.R)
+ ##############################################################################
+
+ test_that("eacf works", {
+
+ # regular implementation
+ eacf1 <- eacf(y ~ x, exdata, xcoord, ycoord)
+ expect_s3_class(eacf1, "eacf")
+ expect_s3_class(eacf1, "tbl_df")
+ expect_s3_class(eacf1, "tbl")
+ expect_s3_class(eacf1, "data.frame")
+ expect_equal(NROW(eacf1), 15)
+ expect_equal(NCOL(eacf1), 4)
+
+ eacf1_q <- eacf(y ~ x, exdata, "xcoord", "ycoord")
+ expect_s3_class(eacf1_q, "eacf")
+ expect_s3_class(eacf1_q, "tbl_df")
+ expect_s3_class(eacf1_q, "tbl")
+ expect_s3_class(eacf1_q, "data.frame")
+ expect_equal(NROW(eacf1_q), 15)
+ expect_equal(NCOL(eacf1_q), 4)
+
+
+ # quoting works (need to NULL out call attribute as that is different)
+ eacf1_cn <- eacf1
+ attr(eacf1_cn, "call") <- NULL
+ eacf1_q_cn <- eacf1_q
+ attr(eacf1_q_cn, "call") <- NULL
+ expect_equal(eacf1_cn, eacf1_q_cn)
+
+ # specifying bins and cutoff
+ eacf2 <- eacf(y ~ x, exdata, xcoord, ycoord, bins = 30, cutoff = 5)
+ expect_s3_class(eacf2, "eacf")
+ expect_s3_class(eacf2, "tbl_df")
+ expect_s3_class(eacf2, "tbl")
+ expect_s3_class(eacf2, "data.frame")
+ expect_equal(NROW(eacf2), 30)
+ expect_equal(NCOL(eacf2), 4)
+ dist_matrix <- spdist(exdata, "xcoord", "ycoord")
+
+ # specifying distance matrix
+ eacf3 <- eacf(y ~ x, exdata, dist_matrix = dist_matrix)
+ expect_s3_class(eacf3, "eacf")
+ expect_s3_class(eacf3, "tbl_df")
+ expect_s3_class(eacf3, "tbl")
+ expect_s3_class(eacf3, "data.frame")
+ expect_equal(NROW(eacf3), 15)
+ expect_equal(NCOL(eacf3), 4)
+
+ # specifying partition factor
+ eacf4 <- eacf(y ~ x, exdata, xcoord, ycoord, partition_factor = ~group)
+ expect_s3_class(eacf4, "eacf")
+ expect_s3_class(eacf4, "tbl_df")
+ expect_s3_class(eacf4, "tbl")
+ expect_s3_class(eacf4, "data.frame")
+ expect_equal(NROW(eacf4), 15)
+ expect_equal(NCOL(eacf4), 4)
+ expect_false(identical(eacf1, eacf4)) # make sure results are not identical to full eacf
+
+ # works with sf object
+ exdata_sf <- sf::st_as_sf(exdata, coords = c("xcoord", "ycoord"))
+ expect_error(eacf(y ~ x, exdata_sf), NA)
+ eacf5 <- eacf(y ~ x, exdata_sf)
+ eacf5_cn <- eacf5
+ attr(eacf5_cn, "call") <- NULL
+ expect_true(identical(eacf1_cn, eacf5_cn))
+
+ # works with one dimension
+ expect_error(eacf(y ~ x, exdata, xcoord), NA)
+
+ # errors occur
+ expect_error(eacf(y ~ x, exdata))
+ expect_error(eacf(y ~ x, exdata, xcoord_xyz))
+ expect_error(eacf(y ~ x, exdata, xcoord, ycoord_xyz))
+
+ # works with cloud argument
+ eacf1c <- eacf(y ~ x, exdata, xcoord, ycoord, cloud = TRUE)
+ expect_s3_class(eacf1c, "eacf")
+ expect_s3_class(eacf1c, "tbl_df")
+ expect_s3_class(eacf1c, "tbl")
+ expect_s3_class(eacf1c, "data.frame")
+ expect_equal(NROW(eacf1c), 3965)
+ expect_equal(NCOL(eacf1c), 2)
+
+ # plot works
+ expect_error(plot(eacf1), NA)
+ expect_error(plot(eacf1, pch = 1), NA)
+ expect_error(plot(eacf1c), NA)
+ expect_error(plot(eacf1c, pch = 19), NA)
+ })
+
##############################################################################
############################ esv (test-esv.R)
##############################################################################
diff --git a/tests/testthat/test-spglm.R b/tests/testthat/test-spglm.R
index 85f7a0e..405383f 100644
--- a/tests/testthat/test-spglm.R
+++ b/tests/testthat/test-spglm.R
@@ -55,6 +55,9 @@ test_that("generics work spglm point data", {
# deviance
expect_vector(deviance(spmod1))
+ # eacf
+ expect_s3_class(eacf(y ~ x, exdata, xcoord = xcoord, ycoord = ycoord), "data.frame")
+
# esv
expect_s3_class(esv(y ~ x, exdata, xcoord = xcoord, ycoord = ycoord), "data.frame")
@@ -220,6 +223,9 @@ test_that("generics work spglm point data with missing", {
# deviance
expect_vector(deviance(spmod1))
+ # eacf
+ expect_s3_class(eacf(y ~ x, exdata_M, xcoord = xcoord, ycoord = ycoord), "data.frame")
+
# esv
expect_s3_class(esv(y ~ x, exdata_M, xcoord = xcoord, ycoord = ycoord), "data.frame")
@@ -380,6 +386,9 @@ test_that("generics work spglm polygon data with missing", {
# deviance
expect_vector(deviance(spmod1))
+ # eacf
+ expect_s3_class(eacf(y ~ x, exdata_Mpoly, xcoord = xcoord, ycoord = ycoord), "data.frame")
+
# esv
expect_s3_class(esv(y ~ x, exdata_Mpoly, xcoord = xcoord, ycoord = ycoord), "data.frame")
diff --git a/tests/testthat/test-splm.R b/tests/testthat/test-splm.R
index f07b6a9..3b1b3df 100644
--- a/tests/testthat/test-splm.R
+++ b/tests/testthat/test-splm.R
@@ -51,6 +51,9 @@ test_that("generics work splm point data", {
# deviance
expect_vector(deviance(spmod1))
+ # eacf
+ expect_s3_class(eacf(y ~ x, exdata, xcoord = xcoord, ycoord = ycoord), "data.frame")
+
# esv
expect_s3_class(esv(y ~ x, exdata, xcoord = xcoord, ycoord = ycoord), "data.frame")
@@ -213,6 +216,9 @@ test_that("generics work splm point data with missing", {
# deviance
expect_vector(deviance(spmod1))
+ # eacf
+ expect_s3_class(eacf(y ~ x, exdata_M, xcoord = xcoord, ycoord = ycoord), "data.frame")
+
# esv
expect_s3_class(esv(y ~ x, exdata_M, xcoord = xcoord, ycoord = ycoord), "data.frame")
@@ -370,6 +376,9 @@ test_that("generics work splm polygon data with missing", {
# deviance
expect_vector(deviance(spmod1))
+ # eacf
+ expect_s3_class(eacf(y ~ x, exdata_Mpoly, xcoord = xcoord, ycoord = ycoord), "data.frame")
+
# esv
expect_s3_class(esv(y ~ x, exdata_Mpoly, xcoord = xcoord, ycoord = ycoord), "data.frame")
From 0319e417f616cc589985e5cc52ac837dc430e84b Mon Sep 17 00:00:00 2001
From: Michael Dumelle
Date: Tue, 25 Nov 2025 12:09:08 -0800
Subject: [PATCH 5/9] update introductory vignette to include all five website
articles
---
vignettes/introduction.Rmd | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd
index 4bac858..38a6212 100644
--- a/vignettes/introduction.Rmd
+++ b/vignettes/introduction.Rmd
@@ -43,11 +43,13 @@ If you use `spmodel` in a formal publication or report, please cite it. Citing `
citation(package = "spmodel")
```
-There are three more `spmodel` vignettes available on our website at [https://usepa.github.io/spmodel/](https://usepa.github.io/spmodel/):
+There are five more `spmodel` vignettes available on our website at [https://usepa.github.io/spmodel/](https://usepa.github.io/spmodel/):
1. A Detailed Guide to `spmodel`
-2. Spatial Generalized Linear Models in `spmodel
-3. Technical Details
+2. Spatial Generalized Linear Models in `spmodel`
+3. Using `emmeans` to Estimate Marginal Means of `spmodel` Objects
+4. Block Prediction (i.e., Blocking Kriging) in `spmodel`
+5. Technical Details
Additionally, there are two workbooks that have accompanied recent `spmodel` workshops:
From 3ff815683e827dd7fbddc357cd09672644f192f5 Mon Sep 17 00:00:00 2001
From: Michael Dumelle
Date: Fri, 28 Nov 2025 17:07:25 -0800
Subject: [PATCH 6/9] update tech docs for eacf
---
vignettes/articles/technical.Rmd | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/vignettes/articles/technical.Rmd b/vignettes/articles/technical.Rmd
index 95c751d..e79de11 100644
--- a/vignettes/articles/technical.Rmd
+++ b/vignettes/articles/technical.Rmd
@@ -1330,14 +1330,20 @@ The following table contains a table of inverse link functions, $d_i$, and $D_{i
: A table of inverse link functions and relevant quantities for each spatial generalized linear model family.
-# The Empirical Semivariogram (`esv()`) {#sec:esv}
+# The Empirical Semivariogram and Autocovariance (`esv()` and `eacf()`) {#sec:esv}
-The empirical semivariogram is a moment-based estimate of the theoretical semivariogram. The empirical semivariogram quantifies half of the average squared difference in the response among observations in several distance classes. More formally, the empirical semivariogram is defined as \begin{equation}\label{eq:esv}
+The empirical semivariogram is a moment-based estimate of the theoretical semivariogram. The empirical semivariogram quantifies half of the average squared difference in the response among observations in several distance classes. More formally, the empirical semivariogram, $\hat{\gamma}(h)$, is defined as \begin{equation}\label{eq:esv}
\hat{\gamma}(h) = \frac{1}{2|N(h)|} \sum_{N(h)} (y_i - y_j)^2,
-\end{equation} where $N(h)$ is the set of observations in $\mathbf{y}$ that are $h$ distance apart (distance classes) and $|N(h)|$ is the cardinality of $N(h)$ [@cressie1993statistics]. Often the set $N(h)$ contains observations that are $h \pm c$ apart, where $c$ is some constant. This approach is known as "binning" the empirical semivariogram. The default in `spmodel` is to construct the semivariogram using 15 equally spaced bins where $h$ is contained in $(0, h_{max}]$, and $h_{max}$ is known as a "distance cutoff". Distance cutoffs are commonly used when constructing $\hat{\gamma}(h)$ because there tend to be few pairs with large distances. The default in `spmodel` is to use a cutoff of half the maximum distance (hypotenuse) of the domain's bounding box.
+\end{equation} where $N(h)$ is the set of observations in $\mathbf{y}$ that are $h$ distance apart (distance classes) and $|N(h)|$ is the cardinality of $N(h)$ [@cressie1993statistics]. Often the set $N(h)$ contains observations that are $h \pm c$ apart, where $c$ is some constant. This approach is known as "binning" the empirical semivariogram. The default in `spmodel` is to construct the semivariogram using 15 equally spaced bins where $h$ is contained in $(0, h_{max}]$, and $h_{max}$ is known as a "distance cutoff". Distance cutoffs are commonly used when constructing $\hat{\gamma}(h)$ because there tend to be few pairs with large distances. The default in `spmodel` is to use a cutoff of half the maximum distance (hypotenuse) of the domain's bounding box. Because we are typically interested in characterizing residual semivariance, $y_i$ and $y_j$ tend to represent residuals from an ordinary least squares fit given some fixed effects design matrix, $\mathbf{X}$, and fixed effects, $\hat{\boldsymbol{\beta}}$.
The main purpose of the empirical semivariogram is its use in semivariogram weighted least squares estimation for spatial linear models, though it can also be used as a visual diagnostic to assess the fit of a spatial covariance function.
+The empirical semivariogram is closely related to the empirical autocovariance [@zimmerman2024spatial]. The empirical autocovariance quantifies the average covariance among observations in several distance classes. Using similar notation as for the empirical semivariogram,
+\begin{equation}\label{eq:eacf}
+ \hat{eacf}(h) = \frac{1}{|N(h)|} \sum_{N(h)} (y_i - \hat{y})(y_j - \hat{y}),
+\end{equation}
+where $\hat{y} = \bar{y}$, the sample mean of all the observations. Like with `esv()`, the default is to use 15 equally spaced bins with a distance cutoff of half the maximum distance (hypotenuse) of the domain's bounding box. Because we are typically interested in characterizing residual autocovariance, $y_i - \hat{y}$ and $y_j - \hat{y}$ tend to represent residuals from an ordinary least squares fit given some fixed effects design matrix, $\mathbf{X}$, and fixed effects, $\hat{\boldsymbol{\beta}}$.
+
# A Note on Covariance Square Roots and Inverse Products {#sec:iprod}
Often $\boldsymbol{\Sigma}^{-1}$ is not strictly needed for estimation, prediction, or other purposes, but at least the product between $\boldsymbol{\Sigma}^{-1}$ and some other matrix is needed. Consider the example of the covariance matrix of $\hat{\boldsymbol{\beta}}$ and observe $\mathbf{X}^\top \boldsymbol{\Sigma}^{-1} \mathbf{X}$ is needed. The most direct way to find this product is certainly to obtain $\boldsymbol{\Sigma}^{-1}$ and then multiply by $\mathbf{X}^\top$ on the left and $\mathbf{X}$ on the right. This is both computationally expensive and cannot be used to compute products that involve $\boldsymbol{\Sigma}^{-1/2}$, which are often useful. It is helpful to define $\boldsymbol{\Sigma} = \mathbf{S} \mathbf{S}^\top$ for some matrix $\mathbf{S}$ and rewrite $\mathbf{X}^\top \boldsymbol{\Sigma}^{-1} \mathbf{X}$ as $\mathbf{X}^\top (\mathbf{S}^\top)^{-1} \mathbf{S}^{-1} \mathbf{X} = (\mathbf{S}^{-1} \mathbf{X})^\top \mathbf{S}^{-1} \mathbf{X}$. Then one computes the inverse products by finding $\mathbf{S}$.
From f481deeafa20c930754b12e65336ab9123d797b2 Mon Sep 17 00:00:00 2001
From: Michael Dumelle
Date: Wed, 10 Dec 2025 09:37:11 -0800
Subject: [PATCH 7/9] added warning message that clarifies xcoord and ycoord
are ignored when data is an sf object
---
NEWS.md | 1 +
R/get_data_object.R | 3 +++
R/get_data_object_glm.R | 3 +++
3 files changed, 7 insertions(+)
diff --git a/NEWS.md b/NEWS.md
index 0ff0240..41a80bf 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -8,6 +8,7 @@
* Changed the default `size` argument to the local argument in `predict(..., block = TRUE)` and `augment(..., block = TRUE)` from 1000 to 4000. This enhances the block prediction (i.e., Kriging) approximation's accuracy but can slightly increase computational complexity.
* Enhanced efficiency of block prediction (i.e., Kriging) for large `newdata` objects.
+* Added a warning message that clarifies `xcoord` and `ycoord` are ignored when `data` is an `sf` object.
* Minor documentation updates.
# spmodel 0.11.1
diff --git a/R/get_data_object.R b/R/get_data_object.R
index 8d319de..4f7107b 100644
--- a/R/get_data_object.R
+++ b/R/get_data_object.R
@@ -23,6 +23,9 @@ get_data_object_splm <- function(formula, data, spcov_initial, xcoord, ycoord, e
data_sf <- suppressWarnings(sf::st_centroid(data))
# store as data frame
data <- sf_to_df(data_sf)
+ if (!missing(xcoord) || !missing(ycoord)) {
+ warning("data is an sf object. Ignoring xcoord and ycoord arguments.", call. = FALSE)
+ }
## name xcoord ".xcoord" to be used later
xcoord <- ".xcoord"
## name ycoord ".ycoord" to be used later
diff --git a/R/get_data_object_glm.R b/R/get_data_object_glm.R
index f8b405f..65d1704 100644
--- a/R/get_data_object_glm.R
+++ b/R/get_data_object_glm.R
@@ -24,6 +24,9 @@ get_data_object_spglm <- function(formula, family, data, spcov_initial, xcoord,
data_sf <- suppressWarnings(sf::st_centroid(data))
# store as data frame
data <- sf_to_df(data_sf)
+ if (!missing(xcoord) || !missing(ycoord)) {
+ warning("data is an sf object. Ignoring xcoord and ycoord arguments.", call. = FALSE)
+ }
## name xcoord ".xcoord" to be used later
xcoord <- ".xcoord"
## name ycoord ".ycoord" to be used later
From 122eab355764f3ce4bc1b2ddb33d0ef4d2ffda98 Mon Sep 17 00:00:00 2001
From: Michael Dumelle
Date: Mon, 12 Jan 2026 13:42:10 -0800
Subject: [PATCH 8/9] fix bug in prediction that caused NA values with matern
spatial covariance when a location in newdata matches a location in data
---
NEWS.md | 4 ++++
R/spcov_matrix.R | 2 +-
R/spcov_vector.R | 2 ++
3 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/NEWS.md b/NEWS.md
index 41a80bf..2258d52 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -11,6 +11,10 @@
* Added a warning message that clarifies `xcoord` and `ycoord` are ignored when `data` is an `sf` object.
* Minor documentation updates.
+## Bug Fixes
+
+* Fixed a bug in `predict(object, newdata)` and `augment(object, newdata)` that could cause `NA` values when the spatial covariance was `"matern"` and a location in `newdata` was the exact same as a location in `data` (the `data` argument used to fit `object`).
+
# spmodel 0.11.1
## Minor Updates
diff --git a/R/spcov_matrix.R b/R/spcov_matrix.R
index 4e7dbd5..aaa563a 100644
--- a/R/spcov_matrix.R
+++ b/R/spcov_matrix.R
@@ -160,7 +160,7 @@ spcov_matrix.magnetic <- function(spcov_params, dist_matrix, diagtol = 0, ...) {
spcov_matrix.matern <- function(spcov_params, dist_matrix, diagtol = 0, ...) {
eta <- sqrt(2 * spcov_params[["extra"]]) * (dist_matrix / spcov_params[["range"]])
spcov_matrix_val <- spcov_params[["de"]] * 2^(1 - spcov_params[["extra"]]) / gamma(spcov_params[["extra"]]) * eta^spcov_params[["extra"]] * besselK(as.matrix(eta), nu = spcov_params[["extra"]]) # eta as sparse matrix causes error
- dist_matrix_zero <- which(dist_matrix == 0)
+ dist_matrix_zero <- which(dist_matrix == 0) # consider epsilon threshold instead of exactly zero?
spcov_matrix_val[dist_matrix_zero] <- spcov_params[["de"]]
spcov_params[["ie"]] <- max(spcov_params[["ie"]], 1e-4 * spcov_params[["de"]], diagtol)
diag(spcov_matrix_val) <- spcov_params[["de"]] + spcov_params[["ie"]]
diff --git a/R/spcov_vector.R b/R/spcov_vector.R
index 9eb941a..1b327f3 100644
--- a/R/spcov_vector.R
+++ b/R/spcov_vector.R
@@ -133,6 +133,8 @@ spcov_vector.magnetic <- function(spcov_params, dist_vector) {
spcov_vector.matern <- function(spcov_params, dist_vector) {
eta <- sqrt(2 * spcov_params[["extra"]]) * (dist_vector / spcov_params[["range"]])
spcov_vector_val <- spcov_params[["de"]] * 2^(1 - spcov_params[["extra"]]) / gamma(spcov_params[["extra"]]) * eta^spcov_params[["extra"]] * besselK(as.matrix(eta), nu = spcov_params[["extra"]])
+ dist_vector_zero <- which(dist_vector == 0) # consider epsilon threshold instead of exactly zero?
+ spcov_vector_val[dist_vector_zero] <- spcov_params[["de"]]
spcov_vector_val
}
From 6b2f971361a5baf53f26b53b6502f0e4b3e9a115 Mon Sep 17 00:00:00 2001
From: Michael Dumelle
Date: Mon, 26 Jan 2026 11:57:01 -0800
Subject: [PATCH 9/9] website updates
---
docs/404.html | 2 +-
docs/LICENSE.html | 2 +-
docs/articles/SPGLMs.html | 2 +-
docs/articles/block.html | 2 +-
docs/articles/emmeans.html | 2 +-
docs/articles/guide.html | 2 +-
docs/articles/index.html | 2 +-
docs/articles/introduction.html | 12 +-
docs/articles/technical.html | 30 ++-
docs/authors.html | 2 +-
docs/index.html | 2 +-
docs/news/index.html | 20 +-
docs/pkgdown.yml | 2 +-
docs/reference/AICc.html | 2 +-
docs/reference/AUROC.html | 2 +-
docs/reference/anova.spmodel.html | 2 +-
docs/reference/augment.spmodel.html | 2 +-
docs/reference/caribou.html | 2 +-
docs/reference/coef.spmodel.html | 2 +-
docs/reference/confint.spmodel.html | 2 +-
docs/reference/cooks.distance.spmodel.html | 2 +-
docs/reference/covmatrix.html | 2 +-
docs/reference/deviance.spmodel.html | 2 +-
docs/reference/dispersion_initial.html | 2 +-
docs/reference/dispersion_params.html | 2 +-
docs/reference/eacf-1.png | Bin 0 -> 34794 bytes
docs/reference/eacf.html | 235 +++++++++++++++++++++
docs/reference/esv.html | 2 +-
docs/reference/fc_borders.html | 2 +-
docs/reference/fitted.spmodel.html | 2 +-
docs/reference/formula.spmodel.html | 4 +-
docs/reference/glance.spmodel.html | 2 +-
docs/reference/glances.html | 2 +-
docs/reference/hatvalues.spmodel.html | 2 +-
docs/reference/index.html | 6 +-
docs/reference/influence.spmodel.html | 2 +-
docs/reference/labels.spmodel.html | 2 +-
docs/reference/lake.html | 2 +-
docs/reference/lake_preds.html | 2 +-
docs/reference/logLik.spmodel.html | 2 +-
docs/reference/loocv.html | 2 +-
docs/reference/model.frame.spmodel.html | 2 +-
docs/reference/model.matrix.spmodel.html | 2 +-
docs/reference/moose.html | 2 +-
docs/reference/moose_preds.html | 2 +-
docs/reference/moss.html | 2 +-
docs/reference/plot.spmodel.html | 2 +-
docs/reference/predict.spmodel.html | 5 +-
docs/reference/print.spmodel.html | 2 +-
docs/reference/pseudoR2.html | 2 +-
docs/reference/randcov_initial.html | 2 +-
docs/reference/randcov_params.html | 2 +-
docs/reference/reexports.html | 2 +-
docs/reference/residuals.spmodel.html | 2 +-
docs/reference/seal.html | 2 +-
docs/reference/spautor.html | 2 +-
docs/reference/spautorRF.html | 2 +-
docs/reference/spcov_initial.html | 2 +-
docs/reference/spcov_params.html | 2 +-
docs/reference/spgautor.html | 2 +-
docs/reference/spglm.html | 2 +-
docs/reference/splm.html | 2 +-
docs/reference/splmRF.html | 2 +-
docs/reference/spmodel-package.html | 2 +-
docs/reference/sprbeta.html | 2 +-
docs/reference/sprbinom.html | 2 +-
docs/reference/sprgamma.html | 2 +-
docs/reference/sprinvgauss.html | 2 +-
docs/reference/sprnbinom.html | 2 +-
docs/reference/sprnorm.html | 2 +-
docs/reference/sprpois.html | 2 +-
docs/reference/sulfate.html | 2 +-
docs/reference/sulfate_preds.html | 2 +-
docs/reference/summary.spmodel.html | 2 +-
docs/reference/texas.html | 2 +-
docs/reference/tidy.spmodel.html | 2 +-
docs/reference/varcomp.html | 2 +-
docs/reference/vcov.spmodel.html | 2 +-
docs/sitemap.xml | 3 +
79 files changed, 371 insertions(+), 84 deletions(-)
create mode 100644 docs/reference/eacf-1.png
create mode 100644 docs/reference/eacf.html
diff --git a/docs/404.html b/docs/404.html
index 75f8099..e49b0c4 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -39,7 +39,7 @@
spmodel
- 0.11.1
+ 0.12.0
diff --git a/docs/LICENSE.html b/docs/LICENSE.html
index 7b9ff1d..a4a1ae8 100644
--- a/docs/LICENSE.html
+++ b/docs/LICENSE.html
@@ -17,7 +17,7 @@
spmodel
- 0.11.1
+ 0.12.0
diff --git a/docs/articles/SPGLMs.html b/docs/articles/SPGLMs.html
index d2ebaea..ed777b2 100644
--- a/docs/articles/SPGLMs.html
+++ b/docs/articles/SPGLMs.html
@@ -40,7 +40,7 @@
spmodel
- 0.11.1
+ 0.12.0
diff --git a/docs/articles/block.html b/docs/articles/block.html
index 75a3637..0ff3f7b 100644
--- a/docs/articles/block.html
+++ b/docs/articles/block.html
@@ -40,7 +40,7 @@
spmodel
- 0.11.1
+ 0.12.0
diff --git a/docs/articles/emmeans.html b/docs/articles/emmeans.html
index a1a5b73..596b52f 100644
--- a/docs/articles/emmeans.html
+++ b/docs/articles/emmeans.html
@@ -40,7 +40,7 @@
spmodel
- 0.11.1
+ 0.12.0
diff --git a/docs/articles/guide.html b/docs/articles/guide.html
index 73d2589..2833809 100644
--- a/docs/articles/guide.html
+++ b/docs/articles/guide.html
@@ -40,7 +40,7 @@
spmodel
- 0.11.1
+ 0.12.0
diff --git a/docs/articles/index.html b/docs/articles/index.html
index 1691d7c..e3ff4bf 100644
--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -17,7 +17,7 @@
spmodel
- 0.11.1
+ 0.12.0
diff --git a/docs/articles/introduction.html b/docs/articles/introduction.html
index fa5eb64..37a1007 100644
--- a/docs/articles/introduction.html
+++ b/docs/articles/introduction.html
@@ -40,7 +40,7 @@
spmodel
- 0.11.1
+ 0.12.0
@@ -147,12 +147,18 @@
Using emmeans to Estimate Marginal Means of
+spmodel Objects
+
Block Prediction (i.e., Blocking Kriging) in
+spmodel
+
Technical Details
Additionally, there are two workbooks that have accompanied recent
diff --git a/docs/articles/technical.html b/docs/articles/technical.html
index 71d0404..89d8f97 100644
--- a/docs/articles/technical.html
+++ b/docs/articles/technical.html
@@ -40,7 +40,7 @@
spmodel
- 0.11.1
+ 0.12.0
@@ -3461,13 +3461,15 @@
Table of Inverse Link Functions,
-
The Empirical Semivariogram (esv())
+
The Empirical Semivariogram and Autocovariance (esv()
+and eacf())
The empirical semivariogram is a moment-based estimate of the
theoretical semivariogram. The empirical semivariogram quantifies half
of the average squared difference in the response among observations in
-several distance classes. More formally, the empirical semivariogram is
-defined as \[\begin{equation}\label{eq:esv}
+several distance classes. More formally, the empirical semivariogram,
+\(\hat{\gamma}(h)\), is defined as
+\[\begin{equation}\label{eq:esv}
\hat{\gamma}(h) = \frac{1}{2|N(h)|} \sum_{N(h)} (y_i - y_j)^2,
\end{equation}\] where \(N(h)\)
is the set of observations in \(\mathbf{y}\) that are \(h\) distance apart (distance classes) and
@@ -3481,11 +3483,29 @@
because there tend to be
few pairs with large distances. The default in spmodel is
to use a cutoff of half the maximum distance (hypotenuse) of the
-domain’s bounding box.
+domain’s bounding box. Because we are typically interested in
+characterizing residual semivariance, \(y_i\) and \(y_j\) tend to represent residuals from an
+ordinary least squares fit given some fixed effects design matrix, \(\mathbf{X}\), and fixed effects, \(\hat{\boldsymbol{\beta}}\).
The main purpose of the empirical semivariogram is its use in
semivariogram weighted least squares estimation for spatial linear
models, though it can also be used as a visual diagnostic to assess the
fit of a spatial covariance function.
+
The empirical semivariogram is closely related to the empirical
+autocovariance (Zimmerman and Ver Hoef
+2024). The empirical autocovariance quantifies the average
+covariance among observations in several distance classes. Using similar
+notation as for the empirical semivariogram, \[\begin{equation}\label{eq:eacf}
+ \hat{eacf}(h) = \frac{1}{|N(h)|} \sum_{N(h)} (y_i - \hat{y})(y_j -
+\hat{y}),
+\end{equation}\] where \(\hat{y} =
+\bar{y}\), the sample mean of all the observations. Like with
+esv(), the default is to use 15 equally spaced bins with a
+distance cutoff of half the maximum distance (hypotenuse) of the
+domain’s bounding box. Because we are typically interested in
+characterizing residual autocovariance, \(y_i
+- \hat{y}\) and \(y_j -
+\hat{y}\) tend to represent residuals from an ordinary least
+squares fit given some fixed effects design matrix, \(\mathbf{X}\), and fixed effects, \(\hat{\boldsymbol{\beta}}\).
A Note on Covariance Square Roots and Inverse Products
diff --git a/docs/authors.html b/docs/authors.html
index 836bdb4..79dde4a 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -17,7 +17,7 @@
spmodel
- 0.11.1
+ 0.12.0
Added eacf() to compute the empirical autocovariance function.
+
+
+
Minor Updates
+
Changed the default size argument to the local argument in predict(..., block = TRUE) and augment(..., block = TRUE) from 1000 to 4000. This enhances the block prediction (i.e., Kriging) approximation’s accuracy but can slightly increase computational complexity.
+
Enhanced efficiency of block prediction (i.e., Kriging) for large newdata objects.
+
Added a warning message that clarifies xcoord and ycoord are ignored when data is an sf object.
+
Minor documentation updates.
+
+
+
Bug Fixes
+
Fixed a bug in predict(object, newdata) and augment(object, newdata) that could cause NA values when the spatial covariance was "matern" and a location in newdata was the exact same as a location in data (the data argument used to fit object).
A data frame or sf object containing the variables in formula
+and geographic information.
+
+
+
xcoord
+
Name of the variable in data representing the x-coordinate.
+Can be quoted or unquoted. Not required if data is an sf object.
+
+
+
ycoord
+
Name of the variable in data representing the y-coordinate.
+Can be quoted or unquoted. Not required if data is an sf object.
+
+
+
cloud
+
A logical indicating whether the empirical autocovariance should
+be summarized by distance class or not. When cloud = FALSE (the default), pairwise autocovariances
+are binned and averaged within distance classes. When cloud = TRUE,
+all pairwise autocovariances and distances are returned (this is known as
+the "cloud" autocovariance).
+
+
+
bins
+
The number of equally spaced bins. The default is 15. Ignored if
+cloud = TRUE.
+
+
+
cutoff
+
The maximum distance considered.
+The default is half the diagonal of the bounding box from the coordinates.
+
+
+
dist_matrix
+
A distance matrix to be used instead of providing coordinate names.
+
+
+
partition_factor
+
An optional formula specifying the partition factor.
+If specified, autocovariances are only computed for observations sharing the
+same level of the partition factor.
+
+
+
x
+
An object from eacf().
+
+
+
...
+
Other arguments passed to other methods.
+
+
+
+
Value
+
+
+
If cloud = FALSE, a tibble (data.frame) with distance bins
+(bins), the average distance (dist), the average autocovariance (acov), and the
+number of (unique) pairs (np). If cloud = TRUE, a tibble
+(data.frame) with distance (dist) and autocovariance (acov)
+for each unique pair.
+
+
+
Details
+
The empirical autocovariance (i.e., empirical covariance) is a tool used to visualize and model
+spatial dependence by estimating the semivariance of a process at varying distances.
+For a constant-mean process, the
+autocovariance at distance \(h\) is denoted \(Cov(h)\) and defined as
+\(Cov(z1, z2)\). Under second-order stationarity,
+\(Cov(h) = Cov(0) - \gamma(h)\), where \(gamma(h)\) is the semivariance function at distance h. Typically the residuals from an ordinary
+least squares fit defined by formula are second-order stationary with
+mean zero. These residuals are used to compute the empirical autocovariance
+At a distance h, the empirical autocovariance is
+\(1/N(h) \sum (r1 \times r2)\), where \(N(h)\) is the number of (unique)
+pairs in the set of observations whose distance separation is h and
+r1 and r2 are residuals corresponding to observations whose
+distance separation is h. In spmodel, these distance bins actually
+contain observations whose distance separation is h +- c,
+where c is a constant determined implicitly by bins. Typically,
+only observations whose distance separation is below some cutoff are used
+to compute the empirical semivariogram (this cutoff is determined by cutoff).
If block is TRUE, local accepts method and size,
and method takes values of "all", "covariance",
and "distance", similar as when block is FALSE.
-The default method is "distance" with size 1000. This default size is
+The default method is "covariance" with size 4000. This default size is
much larger than when block is FALSE. This is because when block is TRUE,
covariances and explanatory variables are averaged before prediction, which greatly
reduces computational burden, only requiring the Cholesky decomposition
@@ -418,6 +418,7 @@