Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: spmodel
Title: Spatial Statistical Modeling and Prediction
Version: 0.11.1
Version: 0.12.0
Authors@R: c(
person(given = "Michael",
family = "Dumelle",
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ S3method(model.matrix,spautor)
S3method(model.matrix,spgautor)
S3method(model.matrix,spglm)
S3method(model.matrix,splm)
S3method(plot,eacf)
S3method(plot,esv)
S3method(plot,spautor)
S3method(plot,spgautor)
Expand Down Expand Up @@ -359,6 +360,7 @@ export(augment)
export(covmatrix)
export(dispersion_initial)
export(dispersion_params)
export(eacf)
export(esv)
export(glance)
export(glances)
Expand Down
17 changes: 17 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
# spmodel 0.12.0

## Major Updates

* Added `eacf()` to compute the empirical autocovariance function.

## Minor Updates

* Changed the default `size` argument to the local argument in `predict(..., block = TRUE)` and `augment(..., block = TRUE)` from 1000 to 4000. This enhances the block prediction (i.e., Kriging) approximation's accuracy but can slightly increase computational complexity.
* Enhanced efficiency of block prediction (i.e., Kriging) for large `newdata` objects.
* Added a warning message that clarifies `xcoord` and `ycoord` are ignored when `data` is an `sf` object.
* Minor documentation updates.

## Bug Fixes

* Fixed a bug in `predict(object, newdata)` and `augment(object, newdata)` that could cause `NA` values when the spatial covariance was `"matern"` and a location in `newdata` was the exact same as a location in `data` (the `data` argument used to fit `object`).

# spmodel 0.11.1

## Minor Updates
Expand Down
204 changes: 204 additions & 0 deletions R/eacf.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
#' Compute the empirical autocovariance
#'
#' @description Compute the empirical autocovariance (i.e., empirical covariance)
#' for varying bin sizes and cutoff values.
#'
#' @param formula A formula describing the fixed effect structure.
#' @param data A data frame or \code{sf} object containing the variables in \code{formula}
#' and geographic information.
#' @param xcoord Name of the variable in \code{data} representing the x-coordinate.
#' Can be quoted or unquoted. Not required if \code{data} is an \code{sf} object.
#' @param ycoord Name of the variable in \code{data} representing the y-coordinate.
#' Can be quoted or unquoted. Not required if \code{data} is an \code{sf} object.
#' @param cloud A logical indicating whether the empirical autocovariance should
#' be summarized by distance class or not. When \code{cloud = FALSE} (the default), pairwise autocovariances
#' are binned and averaged within distance classes. When \code{cloud} = TRUE,
#' all pairwise autocovariances and distances are returned (this is known as
#' the "cloud" autocovariance).
#' @param bins The number of equally spaced bins. The default is 15. Ignored if
#' \code{cloud = TRUE}.
#' @param cutoff The maximum distance considered.
#' The default is half the diagonal of the bounding box from the coordinates.
#' @param dist_matrix A distance matrix to be used instead of providing coordinate names.
#' @param partition_factor An optional formula specifying the partition factor.
#' If specified, autocovariances are only computed for observations sharing the
#' same level of the partition factor.
#'
#' @details The empirical autocovariance (i.e., empirical covariance) is a tool used to visualize and model
#' spatial dependence by estimating the semivariance of a process at varying distances.
#' For a constant-mean process, the
#' autocovariance at distance \eqn{h} is denoted \eqn{Cov(h)} and defined as
#' \eqn{Cov(z1, z2)}. Under second-order stationarity,
#' \eqn{Cov(h) = Cov(0) - \gamma(h)}, where \eqn{gamma(h)} is the semivariance function at distance \code{h}. Typically the residuals from an ordinary
#' least squares fit defined by \code{formula} are second-order stationary with
#' mean zero. These residuals are used to compute the empirical autocovariance
#' At a distance \code{h}, the empirical autocovariance is
#' \eqn{1/N(h) \sum (r1 \times r2)}, where \eqn{N(h)} is the number of (unique)
#' pairs in the set of observations whose distance separation is \code{h} and
#' \code{r1} and \code{r2} are residuals corresponding to observations whose
#' distance separation is \code{h}. In spmodel, these distance bins actually
#' contain observations whose distance separation is \code{h +- c},
#' where \code{c} is a constant determined implicitly by \code{bins}. Typically,
#' only observations whose distance separation is below some cutoff are used
#' to compute the empirical semivariogram (this cutoff is determined by \code{cutoff}).
#'
#' @name eacf
#'
#' @return If \code{cloud = FALSE}, a tibble (data.frame) with distance bins
#' (\code{bins}), the average distance (\code{dist}), the average autocovariance (\code{acov}), and the
#' number of (unique) pairs (\code{np}). If \code{cloud = TRUE}, a tibble
#' (data.frame) with distance (\code{dist}) and autocovariance (\code{acov})
#' for each unique pair.
#'
#' @export
#'
#' @examples
#' eacf(sulfate ~ 1, sulfate)
#' plot(eacf(sulfate ~ 1, sulfate))
eacf <- function(formula, data, xcoord, ycoord, cloud = FALSE, bins = 15, cutoff, dist_matrix, partition_factor) {

# filter out missing response values
na_index <- is.na(data[[all.vars(formula)[1]]])
data <- data[!na_index, , drop = FALSE]
# finding model frame
data_model_frame <- model.frame(formula, data, drop.unused.levels = TRUE, na.action = na.pass)
# model matrix with potential NA
ob_predictors <- complete.cases(model.matrix(formula, data_model_frame))
if (any(!ob_predictors)) {
stop("Cannot have NA values in predictors.", call. = FALSE)
}

# covert sp to sf
attr_sp <- attr(class(data), "package")
if (!is.null(attr_sp) && length(attr_sp) == 1 && attr_sp == "sp") {
stop("sf objects must be used instead of sp objects. To convert your sp object into an sf object, run sf::st_as_sf().", call. = FALSE)
}

## convert sf to data frame (point geometry) (1d objects obsolete)
### see if data has sf class
if (inherits(data, "sf")) {
data <- suppressWarnings(sf::st_centroid(data))
data <- sf_to_df(data)
### name xcoord ".xcoord" to be used later
xcoord <- ".xcoord"
### name ycoord ".ycoord" to be used later
ycoord <- ".ycoord"
}

# compute spatial distances
if (missing(dist_matrix)) {
# non standard evaluation for the x and y coordinates
xcoord <- substitute(xcoord)
ycoord <- substitute(ycoord)

if (missing(xcoord)) {
stop("The xcoord argument must be specified.", call. = FALSE)
}

if (!missing(xcoord)) {
if (!as.character(xcoord) %in% colnames(data)) {
stop("The xcoord argument must match the name of a variable in data.", call. = FALSE)
}
}

if (missing(ycoord)) {
dist_matrix <- spdist(data, xcoord)
} else {
if (!as.character(ycoord) %in% colnames(data)) {
stop("The ycoord argument must match the name of a variable in data.", call. = FALSE)
}
dist_matrix <- spdist(data, xcoord, ycoord)
}
}


dist_matrix <- as.matrix(dist_matrix)
dist_matrix <- dist_matrix[upper.tri(dist_matrix)]

if (any(dist_matrix == 0)) {
warning("Zero distances observed between at least one pair. Ignoring pairs. If using splm(), consider a different estimation method.", call. = FALSE)
}

if (missing(partition_factor)) {
partition_factor <- NULL
}

if (!is.null(partition_factor)) {
# partition_matrix_val <- triu(partition_matrix(partition_factor, data = data), k = 1)
partition_matrix_val <- as.matrix(partition_matrix(partition_factor, data = data))
partition_matrix_val <- partition_matrix_val[upper.tri(partition_matrix_val)]
dist_matrix <- dist_matrix * partition_matrix_val
}

if (missing(cutoff)) {
cutoff <- NULL
}
if (is.null(cutoff)) {
cutoff <- max(dist_matrix) / 2
}

dist_vector <- dist_matrix
if (any(dist_vector == 0)) {
dist_index <- dist_vector > 0 & dist_vector <= cutoff
} else {
dist_index <- dist_vector <= cutoff
}

dist_vector <- dist_vector[dist_index]

# compute squared differences in the residuals
lmod <- lm(formula = formula, data = data)
residuals <- residuals(lmod)
residual_matrix <- outer(residuals, residuals) # ybar is the fitted mean
residual_matrix <- residual_matrix[upper.tri(residual_matrix)]
if (!is.null(partition_factor)) {
residual_matrix <- residual_matrix * partition_matrix_val
}

residual_vector <- residual_matrix
residual_vector <- residual_vector[dist_index]
residual_vector2 <- residual_vector # just to align with esv() names


if (cloud) {
eacf_out <- get_eacf_cloud(residual_vector2, dist_vector)
} else {
eacf_out <- get_eacf(residual_vector2, dist_vector, bins, cutoff)
}




# remove NA
# eacf_out <- na.omit(eacf_out)
eacf_out <- structure(eacf_out, class = c("eacf", class(eacf_out)), call = match.call(), cloud = cloud)
eacf_out
}

#' @rdname eacf
#' @method plot eacf
#' @param x An object from \code{eacf()}.
#' @param ... Other arguments passed to other methods.
#' @export
plot.eacf <- function(x, ...) {

cal <- attr(x, "call")
if (!is.na(m.f <- match("formula", names(cal)))) {
cal <- cal[c(1, m.f)]
names(cal)[2L] <- ""
}
cc <- deparse(cal, 80)
nc <- nchar(cc[1L], "c")
abbr <- length(cc) > 1 || nc > 75
sub.caption <- if (abbr) {
paste(substr(cc[1L], 1L, min(75L, nc)), "...")
} else {
cc[1L]
}

dotlist <- list(...)
dotlist <- get_eacf_dotlist_defaults(x, dotlist, cloud = attr(x, "cloud"))

do.call("plot", c(list(x = x$dist, y = x$acov), dotlist))
title(sub = sub.caption)
}
2 changes: 1 addition & 1 deletion R/esv.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ esv <- function(formula, data, xcoord, ycoord, cloud = FALSE, robust = FALSE, bi
residual_vector2 <- residual_vector^2

if (cloud) {
esv_out <- get_esv_cloud(residual_vector2, dist_vector, formula)
esv_out <- get_esv_cloud(residual_vector2, dist_vector)
} else {
if (robust) {
residual_vector12 <- sqrt(residual_vector)
Expand Down
3 changes: 3 additions & 0 deletions R/get_data_object.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ get_data_object_splm <- function(formula, data, spcov_initial, xcoord, ycoord, e
data_sf <- suppressWarnings(sf::st_centroid(data))
# store as data frame
data <- sf_to_df(data_sf)
if (!missing(xcoord) || !missing(ycoord)) {
warning("data is an sf object. Ignoring xcoord and ycoord arguments.", call. = FALSE)
}
## name xcoord ".xcoord" to be used later
xcoord <- ".xcoord"
## name ycoord ".ycoord" to be used later
Expand Down
3 changes: 3 additions & 0 deletions R/get_data_object_glm.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ get_data_object_spglm <- function(formula, family, data, spcov_initial, xcoord,
data_sf <- suppressWarnings(sf::st_centroid(data))
# store as data frame
data <- sf_to_df(data_sf)
if (!missing(xcoord) || !missing(ycoord)) {
warning("data is an sf object. Ignoring xcoord and ycoord arguments.", call. = FALSE)
}
## name xcoord ".xcoord" to be used later
xcoord <- ".xcoord"
## name ycoord ".ycoord" to be used later
Expand Down
41 changes: 41 additions & 0 deletions R/get_eacf.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
get_eacf <- function(residual_vector2, dist_vector, bins, cutoff, formula) {

# compute semivariogram classes
dist_classes <- cut(dist_vector, breaks = seq(0, cutoff, length.out = bins + 1))

# compute squared differences within each class
acov <- tapply(residual_vector2, dist_classes, function(x) mean(x))

# compute pairs within each class
np <- tapply(residual_vector2, dist_classes, length)

# set as zero if necessary
np <- ifelse(is.na(np), 0, np)

# compute average distance within each class
dist <- tapply(dist_vector, dist_classes, mean)

# return output
eacf_out <- tibble::tibble(
bins = factor(levels(dist_classes), levels = levels(dist_classes)),
dist = as.numeric(dist),
acov = as.numeric(acov),
np = as.numeric(np)
)

# set row names to NULL
# row.names(eacf_out) <- NULL

eacf_out
}


get_eacf_cloud <- function(residual_vector2, dist_vector) {

eacf_out <- tibble::tibble(dist = dist_vector, acov = residual_vector2)

# set row names to NULL
# row.names(eacf_out) <- NULL

eacf_out
}
68 changes: 68 additions & 0 deletions R/get_eacf_dotlist.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#' Get empirical autocovariance dotlist
#'
#' @param ... Additional arguments to \code{eacf()}
#' @param max_halfdist
#'
#' @return An eacf dotlist
#'
#' @noRd
get_eacf_dotlist <- function(..., max_halfdist) {
# storing dotlist and setting defaults for eacf
dotlist <- list(...)

if (!("bins" %in% names(dotlist))) {
dotlist$bins <- 15
}

if (!("cutoff" %in% names(dotlist))) {
dotlist$cutoff <- max_halfdist
}

# make dotlist eacf
dotlist_eacf <- list(bins = dotlist$bins, cutoff = dotlist$cutoff)
}


get_eacf_dotlist_defaults <- function(x, dotlist, cloud) {

names_dotlist <- names(dotlist)

# set defaults
if (!"main" %in% names_dotlist) {
dotlist$main <- "Empirical Autocovariance"
if (cloud) dotlist$main <- paste0(dotlist$main, " (Cloud)")
}

if (!"xlab" %in% names_dotlist) {
dotlist$xlab <- "Distance"
}

if (!"ylab" %in% names_dotlist) {
dotlist$ylab <- "Autocovariance"
}

if (!cloud && !"pch" %in% names_dotlist) {
dotlist$pch <- 19
}

if (!cloud && !"cex" %in% names_dotlist) {
dotlist$cex <- (x$np - min(x$np)) / (max(x$np) - min(x$np)) * 2 + 1
}

if (!"ylim" %in% names_dotlist) {

# include zero if not in limits

## all greater than zero (positive)
if (all(x$acov > 0)) {
dotlist$ylim <- c(0, 1.1 * max(x$acov))
}

## all less than zero (negative)
if (all(x$acov < 0)) {
dotlist$ylim <- c(1.1 * min(x$acov), 0)
}
}

dotlist
}
2 changes: 1 addition & 1 deletion R/get_esv.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ get_esv_robust <- function(residual_vector12, dist_vector, bins, cutoff, formula
esv_out
}

get_esv_cloud <- function(residual_vector2, dist_vector, formula) {
get_esv_cloud <- function(residual_vector2, dist_vector) {

esv_out <- tibble::tibble(dist = dist_vector, gamma = residual_vector2 / 2)

Expand Down
Loading
Loading