diff --git a/DESCRIPTION b/DESCRIPTION index bf4fec0..f3111b5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: spmodel Title: Spatial Statistical Modeling and Prediction -Version: 0.10.0 +Version: 0.11.0 Authors@R: c( person(given = "Michael", family = "Dumelle", diff --git a/NEWS.md b/NEWS.md index d928390..3b0956c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,19 @@ +# spmodel 0.11.0 + +## Major Updates + +* Added support for Block Prediction (i.e., Block Kriging) to estimate averages (and their uncertainties) over a geographic region. +* Added a vignette to the [`spmodel` website](https://usepa.github.io/spmodel/) titled "Block Prediction (i.e., Block Kriging) in spmodel". +* Added a `fc_borders` data set which contains borders for the Four Corners states in the United States. + +## Bug Fixes + +* Fixed a bug that incorrectly calculated the log determinant of the fixed effects in the restricted log likelihood. +* Fixed a bug that caused an error when `covmatrix(..., cov_type = "pred.pred")` was called on `object` with a non-`NULL` `newdata` element. + # spmodel 0.10.0 -## Minor updates +## Minor Updates * Added a robust semivariogram option to `esv()`; see the `robust`argument to `esv()` ([#28](https://github.com/USEPA/spmodel/issues/28)). * Added `"none"` and `"ie"` spatial covariance types (via `spcov_type` or `spcov_initial`) to `spautor()`, `spgautor()`, and `spautorRF()` ([#27](https://github.com/USEPA/spmodel/issues/27)). diff --git a/R/covmatrix.R b/R/covmatrix.R index efe25a2..866f855 100644 --- a/R/covmatrix.R +++ b/R/covmatrix.R @@ -65,7 +65,7 @@ covmatrix.splm <- function(object, newdata, cov_type, ...) { if (cov_type == "pred.pred") { if (inherits(object$newdata, "sf")) { - object$obdata <- sf_to_df(newdata) + object$obdata <- sf_to_df(object$newdata) } else { object$obdata <- newdata } diff --git a/R/data.R b/R/data.R index 5ee5143..1566e1a 100644 --- a/R/data.R +++ b/R/data.R @@ -211,3 +211,23 @@ #' @source #' The data source is the \code{elect80} data set in the \code{spData} R package. "texas" + + +#' Four Corners State Borders +#' +#' @description State borders for the four corners states in the United States: +#' Arizona, Colorado, New Mexico, Utah. +#' +#' @format An \code{sf} object with 4 rows and 4 columns: +#' +#' \itemize{ +#' \item NAME: State name. +#' \item STUSPS: State postal code. +#' \item GEO.ID: State GEO.ID from the United States Census Bureau. +#' \item geometry: \code{POINT} geometry representing coordinates in a NAD83 +#' projection (EPSG: 5070). Distances between points are in meters. +#' } +#' @source +#' The data source is the United States Census Bureau TIGER/Line Shapefiles. +"fc_borders" +# start with us_borders <- tigris::states(cb = TRUE, year = 2024) diff --git a/R/dispersion_optim2orig.R b/R/dispersion_optim2orig.R index e7541ea..9e2c084 100644 --- a/R/dispersion_optim2orig.R +++ b/R/dispersion_optim2orig.R @@ -10,6 +10,9 @@ dispersion_optim2orig <- function(dispersion_orig2optim, par) { } else { fill_optim_par_val <- par[length(par)] # dispersion is the last element fill_orig_val <- exp(fill_optim_par_val) + # cap lower and upper dispersion values for numeric stability + fill_orig_val <- pmax(1e-8, fill_orig_val) + fill_orig_val <- pmin(1e8, fill_orig_val) names(fill_orig_val) <- "dispersion" new_par <- par[-length(par)] # remove dispersion parameter } diff --git a/R/dispersion_orig2optim.R b/R/dispersion_orig2optim.R index bae76c0..f22b005 100644 --- a/R/dispersion_orig2optim.R +++ b/R/dispersion_orig2optim.R @@ -5,6 +5,7 @@ dispersion_orig2optim <- function(dispersion_initial) { names(dispersion_orig2optim_is_known) <- "dispersion_log" # return dispersion parameter vector + # can consider lower limit like -10, 10 for numerical stability dispersion_orig2optim_val <- ifelse(dispersion_orig2optim_val > 50 & !dispersion_orig2optim_is_known, 50, dispersion_orig2optim_val) dispersion_orig2optim_val <- ifelse(dispersion_orig2optim_val < -50 & !dispersion_orig2optim_is_known, -50, dispersion_orig2optim_val) diff --git a/R/get_local_list.R b/R/get_local_list.R index 0794922..052e83b 100644 --- a/R/get_local_list.R +++ b/R/get_local_list.R @@ -156,3 +156,44 @@ get_local_list_prediction <- function(local) { local } + + + + +get_local_list_prediction_block <- function(local) { + + # set local neighborhood size + # method can be "all" (for all data), "distance" (for local distance neighborhoods) + # or "covariance" (for local covariance neighborhoods) + + if (is.logical(local)) { + if (local) { + local <- list(method = "covariance", size = 1000) + } else { + local <- list(method = "all") + } + } + + names_local <- names(local) + + # errors + if ("method" %in% names_local) { + if (!local$method %in% c("all", "covariance", "distance")) { + stop("Invalid local method. Local method must be \"all\", \"covariance\", or \"distance\".", call. = FALSE) + } + } + + + if (!"method" %in% names_local) { + # local$method <- "all" + local$method <- "covariance" + } + + if (local$method %in% c("distance", "covariance") && !"size" %in% names_local) { + local$size <- 1000 + } + + # no parallel required + + local +} diff --git a/R/laploglik_products.R b/R/laploglik_products.R index ba23289..8400b6e 100644 --- a/R/laploglik_products.R +++ b/R/laploglik_products.R @@ -229,6 +229,7 @@ get_w_and_H_spglm <- function(data_object, dispersion, SigInv_list, SigInv_X, co # Next, compute H D <- get_D(family, w, y, size, dispersion) H <- D - Ptheta # not PD but -H is + # can consider changing tol here for numeric stability solveHg <- solve(H, g) wnew <- w - solveHg # mH_upchol <- chol(Matrix::forceSymmetric(-H)) @@ -342,6 +343,7 @@ get_w_and_H_spgautor <- function(data_object, dispersion, SigInv, SigInv_X, cov_ # Next, compute H D <- get_D(family, w, y, size, dispersion) H <- D - Ptheta # not PD but -H is + # can consider changing tol here for numeric stability solveHg <- solve(H, g) wnew <- w - solveHg # mH_upchol <- chol(Matrix::forceSymmetric(-H)) diff --git a/R/predict.R b/R/predict.R index 20ad711..d011a0c 100644 --- a/R/predict.R +++ b/R/predict.R @@ -69,10 +69,25 @@ #' If \code{local} is \code{TRUE}, defaults for \code{local} are chosen such #' that \code{local} is transformed into #' \code{list(size = 100, method = "covariance", parallel = FALSE)}. +#' +#' If \code{block} is \code{TRUE}, \code{local} accepts \code{method} and \code{size}, +#' and \code{method} takes values of \code{"all"}, \code{"covariance"}, +#' and \code{"distance"}, similar as when \code{block} is \code{FALSE}. +#' The default \code{method} is \code{"distance"} with size \code{1000}. This default \code{size} is +#' much larger than when \code{block} is \code{FALSE}. This is because when \code{block} is \code{TRUE}, +#' covariances and explanatory variables are averaged before prediction, which greatly +#' reduces computational burden, only requiring the Cholesky decomposition +#' of one observed covariance matrix. Because the computational burden is reduced dramatically when \code{block} is \code{TRUE}, +#' parallel processing is not needed and hence, \code{parallel} and \code{ncores} are ignored if specified in \code{local}. #' @param terms If \code{type} is \code{"terms"}, the type of terms to be returned, #' specified via either numeric position or name. The default is all terms are included. #' @param na.action Missing (\code{NA}) values in \code{newdata} will return an error and should #' be removed before proceeding. +#' @param block A logical indicating whether a block prediction over the entire region +#' in \code{newdata} should be returned. When \code{block} is \code{TRUE}, +#' \code{newdata} should be a dense grid of prediction locations that span +#' the entire region. The default is \code{FALSE}, which +#' returns point predictions for each location on \code{newdata}. #' @param ... Other arguments. Only used for models fit using \code{splmRF()} #' or \code{spautorRF()} where \code{...} indicates other #' arguments to \code{ranger::predict.ranger()}. @@ -122,7 +137,8 @@ #' predict(spmod, sulfate_preds, interval = "prediction") #' augment(spmod, newdata = sulfate_preds, interval = "prediction") predict.splm <- function(object, newdata, se.fit = FALSE, scale = NULL, df = Inf, interval = c("none", "confidence", "prediction"), - level = 0.95, type = c("response", "terms"), local, terms = NULL, na.action = na.fail, ...) { + level = 0.95, type = c("response", "terms"), block = FALSE, local, terms = NULL, na.action = na.fail, ...) { + # match interval argument so the three display interval <- match.arg(interval) @@ -133,6 +149,11 @@ predict.splm <- function(object, newdata, se.fit = FALSE, scale = NULL, df = Inf local <- NULL } + if (block) { + object <- predict_block_splm(object, newdata, se.fit, scale, df, interval, level, type, local, terms, na.action, ...) + return(object) + } + # check scale is numeric (if specified) if (!is.null(scale) && !is.numeric(scale)) { stop("scale must be numeric.", call. = FALSE) diff --git a/R/predict_block.R b/R/predict_block.R new file mode 100644 index 0000000..bf458f4 --- /dev/null +++ b/R/predict_block.R @@ -0,0 +1,236 @@ +predict_block_splm <- function(object, newdata, se.fit, scale, df, interval, level, type, local, terms, na.action, ...) { + + + + # deal with local + if (missing(local)) { + local <- NULL + } + + # check scale is numeric (if specified) + if (!is.null(scale) && !is.numeric(scale)) { + stop("scale must be numeric.", call. = FALSE) + } + + # handle na action -- this is an inefficient workaround that should be fixed later + # placeholder as a reminder to consider adding na.action argument at a later date + # na_action <- as.character(substitute(na.action)) + + # error if newdata missing from arguments and object + if (missing(newdata) && is.null(object$newdata)) { + stop("No missing data to predict. newdata must be specified in the newdata argument or object$newdata must be non-NULL.", call. = FALSE) + } + + # rename relevant quantities + obdata <- object$obdata + xcoord <- object$xcoord + ycoord <- object$ycoord + + # write newdata if predicting missing data + if (missing(newdata)) { + add_newdata_rows <- TRUE + newdata <- object$newdata + } else { + add_newdata_rows <- FALSE + } + + # deal with local + if (is.null(local)) { + if (object$n > 10000) { + # if (object$n > 5000 || NROW(newdata) > 5000) { + local <- TRUE + message("Because the sample size of the fitted model object exceeds 10,000, we are setting local = TRUE to perform computationally efficient approximations. To override this behavior and compute the exact solution, rerun predict() with local = FALSE. Be aware that setting local = FALSE may result in exceedingly long computational times.") + } else { + local <- FALSE + } + } + + # save spcov param vector + spcov_params_val <- coef(object, type = "spcov") + + # save randcov param vector + randcov_params_val <- coef(object, type = "randcov") + + + attr_sp <- attr(class(newdata), "package") + if (!is.null(attr_sp) && length(attr_sp) == 1 && attr_sp == "sp") { + stop("sf objects must be used instead of sp objects. To convert your sp object into an sf object, run sf::st_as_sf().", call. = FALSE) + } + + if (inherits(newdata, "sf")) { + newdata <- suppressWarnings(sf::st_centroid(newdata)) + + newdata <- sf_to_df(newdata) + names(newdata)[[which(names(newdata) == ".xcoord")]] <- as.character(xcoord) # only relevant if newdata is sf data is not + names(newdata)[[which(names(newdata) == ".ycoord")]] <- as.character(ycoord) # only relevant if newdata is sf data is not + } + + # add back in zero column to cover anisotropy (should make anisotropy only available 1-d) + if (object$dim_coords == 1) { + obdata[[ycoord]] <- 0 + newdata[[ycoord]] <- 0 + } + + if (object$anisotropy) { # could just do rotate != 0 || scale != 1 + obdata_aniscoords <- transform_anis(obdata, xcoord, ycoord, + rotate = spcov_params_val[["rotate"]], + scale = spcov_params_val[["scale"]] + ) + obdata[[xcoord]] <- obdata_aniscoords$xcoord_val + obdata[[ycoord]] <- obdata_aniscoords$ycoord_val + newdata_aniscoords <- transform_anis(newdata, xcoord, ycoord, + rotate = spcov_params_val[["rotate"]], + scale = spcov_params_val[["scale"]] + ) + newdata[[xcoord]] <- newdata_aniscoords$xcoord_val + newdata[[ycoord]] <- newdata_aniscoords$ycoord_val + } + + formula_newdata <- delete.response(terms(object)) + # fix model frame bug with degree 2 basic polynomial and one prediction row + # e.g. poly(x, y, degree = 2) and newdata has one row + if (any(grepl("nmatrix.", attributes(formula_newdata)$dataClasses, fixed = TRUE)) && NROW(newdata) == 1) { + newdata <- newdata[c(1, 1), , drop = FALSE] + newdata_model_frame <- model.frame(formula_newdata, newdata, drop.unused.levels = FALSE, na.action = na.pass, xlev = object$xlevels) + newdata_model <- model.matrix(formula_newdata, newdata_model_frame, contrasts = object$contrasts) + newdata_model <- newdata_model[1, , drop = FALSE] + # find offset + offset <- model.offset(newdata_model_frame) + if (!is.null(offset)) { + offset <- offset[1] + } + newdata <- newdata[1, , drop = FALSE] + } else { + newdata_model_frame <- model.frame(formula_newdata, newdata, drop.unused.levels = FALSE, na.action = na.pass, xlev = object$xlevels) + # assumes that predicted observations are not outside the factor levels + newdata_model <- model.matrix(formula_newdata, newdata_model_frame, contrasts = object$contrasts) + # find offset + offset <- model.offset(newdata_model_frame) + } + attr_assign <- attr(newdata_model, "assign") + attr_contrasts <- attr(newdata_model, "contrasts") + keep_cols <- which(colnames(newdata_model) %in% colnames(model.matrix(object))) + newdata_model <- newdata_model[, keep_cols, drop = FALSE] + attr(newdata_model, "assign") <- attr_assign[keep_cols] + attr(newdata_model, "contrasts") <- attr_contrasts + + # finding rows w/out NA + ob_predictors <- complete.cases(newdata_model) + if (any(!ob_predictors)) { + stop("Cannot have NA values in predictors.", call. = FALSE) + } + + newdata_model_attr <- attributes(newdata_model) + newdata_model <- matrix(colMeans(newdata_model), nrow = 1) # gives matrix, array class + attr(newdata_model, "assign") <- newdata_model_attr$assign + rownames(newdata_model) <- "1" + colnames(newdata_model) <- newdata_model_attr$dimnames[[2]] + x0 <- newdata_model + betahat <- coef(object) + cov_betahat <- vcov(object) + y <- model.response(model.frame(object)) + offset <- model.offset(model.frame(object)) + # call terms if needed + if (type == "terms") { + return(predict_terms(object, newdata_model, se.fit, scale, df, interval, level, add_newdata_rows, terms, ...)) + } + + + if (interval %in% c("none", "prediction")) { + + # local prediction list + local <- get_local_list_prediction_block(local) + + c0 <- colMeans(covmatrix(object, newdata = newdata, cov_type = "pred.obs")) + Sig <- covmatrix(object) + s0 <- mean(covmatrix(object, newdata = newdata, cov_type = "pred.pred")) + Xmat <- model.matrix(object) + + if (local$method == "all") { + cov_lowchol <- t(Matrix::chol(Matrix::forceSymmetric(Sig))) + } else { + n <- length(c0) + if (local$method == "distance") { + dist_vector <- spdist_vectors(newdata, obdata, xcoord, ycoord, object$dim_coords) + dist_vector <- colMeans(dist_vector) + index <- order(as.numeric(dist_vector))[seq(from = 1, to = min(n, local$size))] + # index <- order(as.numeric(c0))[seq(from = n, to = max(1, n - local$size + 1))] # covariance method for now + } else if (local$method == "covariance") { + index <- order(as.numeric(c0))[seq(from = n, to = max(1, n - local$size + 1))] + } + obdata <- obdata[index, , drop = FALSE] + c0 <- c0[index] + Xmat <- Xmat[index, , drop = FALSE] + y <- y[index] + if (!is.null(offset)) { + offset <- offset[index] + y <- y - offset + } + cov_lowchol <- t(Matrix::chol(Matrix::forceSymmetric(Sig[index, index, drop = FALSE]))) + } + + SqrtSigInv_X <- forwardsolve(cov_lowchol, Xmat) + SqrtSigInv_y <- forwardsolve(cov_lowchol, y) + residuals_pearson <- SqrtSigInv_y - SqrtSigInv_X %*% betahat + SqrtSigInv_c0 <- forwardsolve(cov_lowchol, c0) + + fit <- as.numeric(x0 %*% betahat + Matrix::crossprod(SqrtSigInv_c0, residuals_pearson)) + if (!is.null(offset)) { + fit <- fit + offset + } + + if (se.fit || interval == "prediction") { + H <- x0 - crossprod(SqrtSigInv_c0, SqrtSigInv_X) + vars <- as.numeric(s0 - crossprod(SqrtSigInv_c0, SqrtSigInv_c0) + H %*% tcrossprod(cov_betahat, H)) + se <- sqrt(vars) + if (!is.null(scale)) { + se <- se * scale + df <- df + } else { + df <- Inf + } + if (interval == "prediction") { + tstar <- qt(1 - (1 - level) / 2, df = df) + lwr <- fit - tstar * se + upr <- fit + tstar * se + fit <- cbind(fit, lwr, upr) + row.names(fit) <- "1" + } + if (se.fit) { + return(list(fit = fit, se.fit = se)) + } else { + return(fit) + } + } else { + return(fit) + } + } else if (interval == "confidence") { + # finding fitted values of the mean parameters + fit <- as.numeric(x0 %*% coef(object)) + # apply offset + if (!is.null(offset)) { + fit <- fit + offset + } + vars <- as.numeric(tcrossprod(x0 %*% cov_betahat, x0)) # different from + # predict because x0 is a matrix here, not a vector + se <- sqrt(vars) + if (!is.null(scale)) { + se <- se * scale + df <- df + } else { + df <- Inf + } + tstar <- qt(1 - (1 - level) / 2, df = df) + lwr <- fit - tstar * se + upr <- fit + tstar * se + fit <- cbind(fit, lwr, upr) + row.names(fit) <- "1" + if (se.fit) { + return(list(fit = fit, se.fit = se)) + } else { + return(fit) + } + } else { + stop("Interval must be none, confidence, or prediction") + } +} diff --git a/R/use_gloglik_iid.R b/R/use_gloglik_iid.R index fafa55b..4a5da32 100644 --- a/R/use_gloglik_iid.R +++ b/R/use_gloglik_iid.R @@ -22,12 +22,12 @@ use_gloglik_iid <- function(spcov_initial, estmethod, data_object, dist_matrix_l lmod <- lm(data_object$formula, data = data_object$obdata) sse <- sum(residuals(lmod)^2) - RX <- crossprod(X, X) + Xt_X <- crossprod(X, X) l1 <- 0 # sum of the logs of the identity (all ones) l2 <- sse # l3 <- 2 * sum(log(diag(abs(R)))) - l3 <- sum(log(diag(abs(RX)))) + l3 <- 2 * sum(log(diag(chol(Xt_X)))) if (estmethod == "reml") { minustwologlik <- as.numeric(l1 + (data_object$n - data_object$p) * log(l2) + l3 + (data_object$n - data_object$p) * (1 + log(2 * pi / (data_object$n - data_object$p)))) diff --git a/_pkgdown.yml b/_pkgdown.yml index 8881dd9..958f0a1 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -10,6 +10,7 @@ articles: - articles/guide - articles/SPGLMs - articles/emmeans + - articles/block - articles/technical # toc broken in bootstrap 5, pkgdown 2.1.0 diff --git a/data/fc_borders.rda b/data/fc_borders.rda new file mode 100644 index 0000000..75ea8f9 Binary files /dev/null and b/data/fc_borders.rda differ diff --git a/docs/404.html b/docs/404.html index 4bd27a9..a689570 100644 --- a/docs/404.html +++ b/docs/404.html @@ -39,7 +39,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -67,6 +67,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/LICENSE.html b/docs/LICENSE.html index ad97e9a..b99ee70 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/articles/SPGLMs.html b/docs/articles/SPGLMs.html index bf849a3..00c0904 100644 --- a/docs/articles/SPGLMs.html +++ b/docs/articles/SPGLMs.html @@ -40,7 +40,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -68,6 +68,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • @@ -1289,8 +1292,8 @@

    References -Cressie, Noel. 1993. Statistics for Spatial Data. John Wiley -& Sons. +Cressie, Noel. 1993. Statistics for Spatial Data (Revised +Edition). Wiley: Hoboken, NJ. https://doi.org/10.1002/9781119115151.
    Faraway, Julian J. 2016. Extending the Linear Model with diff --git a/docs/articles/articles/block.html b/docs/articles/articles/block.html new file mode 100644 index 0000000..8dad9fb --- /dev/null +++ b/docs/articles/articles/block.html @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/docs/articles/block.html b/docs/articles/block.html new file mode 100644 index 0000000..d5cc270 --- /dev/null +++ b/docs/articles/block.html @@ -0,0 +1,370 @@ + + + + + + + +Block Prediction (i.e., Block Kriging) in spmodel • spmodel + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    +
    + + + + +
    +

    Introduction +

    +

    spmodel is a package used to fit, summarize, and predict +for a variety of spatial statistical models. This vignette explores +tools for predicting the average value of a response variable in a +particular geographic region, an approach known as Block Prediction +(i.e., Block Kriging) (Cressie 1993; Ver Hoef +2008). Before proceeding, we load spmodel, +sf, and ggplot2 by running

    + +

    If using spmodel in a formal publication or report, +please cite it. Citing spmodel lets us devote more +resources to the package in the future. We view the spmodel +citation by running

    +
    +citation(package = "spmodel")
    +
    #> To cite spmodel in publications use:
    +#> 
    +#>   Dumelle M, Higham M, Ver Hoef JM (2023). spmodel: Spatial statistical
    +#>   modeling and prediction in R. PLOS ONE 18(3): e0282524.
    +#>   https://doi.org/10.1371/journal.pone.0282524
    +#> 
    +#> A BibTeX entry for LaTeX users is
    +#> 
    +#>   @Article{,
    +#>     title = {{spmodel}: Spatial statistical modeling and prediction in {R}},
    +#>     author = {Michael Dumelle and Matt Higham and Jay M. {Ver Hoef}},
    +#>     journal = {PLOS ONE},
    +#>     year = {2023},
    +#>     volume = {18},
    +#>     number = {3},
    +#>     pages = {1--32},
    +#>     doi = {10.1371/journal.pone.0282524},
    +#>     url = {https://doi.org/10.1371/journal.pone.0282524},
    +#>   }
    +
    +
    +

    Block Prediction (i.e., Block Kriging) in spmodel +

    +

    The sulfate data is an sf object that +contains sulfate measurements in the conterminous United States (CONUS). +We first visualize the distribution of the sulfate data by running

    +
    +ggplot(sulfate, aes(color = sulfate)) +
    +  geom_sf(size = 2.5) +
    +  scale_color_viridis_c() +
    +  theme_gray(base_size = 18)
    +

    +

    In the “Detailed Guide” vignette, we describe how to fit a spatial +linear model to the sulfate data and use it to predict sulfate +concentrations at the geographic locations in +sulfate_preds. But what if we want to predict the average +sulfate concentration in the entire CONUS? Or what if we want to predict +the average sulfate concentration in a single state like Colorado? Or a +region like the Four Corners region of CONUS? We can answer these +questions using Block Prediction (i.e., Block Kriging).

    +

    First, we fit a spatial linear model for sulfate using an +intercept-only model with an exponential spatial covariance function by +running

    +
    +sulfmod <- splm(sulfate ~ 1, sulfate, spcov_type = "exponential")
    +

    We can use this model to perform Block Prediction in any subregion of +CONUS. Consider the four corners region of CONUS, which is composed of +four states: Arizona (AZ), Colorado (CO), New Mexico (NM), and Utah +(UT). fc_borders is a simple features object with polygonal +geometry that contains the boundaries of these four states. We can +visualize the states in this region by running

    +
    +ggplot(data = fc_borders, aes(fill = STUSPS, label = STUSPS)) + 
    +  geom_sf() +
    +  geom_sf_text()
    +

    +

    We subset the sulfate data to the observations from the Four Corners +region so that we can zoom in on their values. We implement a slight +jitter (random perturbation) of the coordinates to avoid overplotting +points that are very close to one another. Moreover, we use a different +color scale than for the full sulfate data so we can better +distinguish among the sulfate concentrations in the region:

    +
    +sulfate_fc <- sulfate[fc_borders, ]
    +sulfate_fc <- st_jitter(sulfate_fc, factor = 0.04)
    +ggplot() + 
    +  geom_sf(data = fc_borders, fill = "transparent") +
    +  geom_sf(data = sulfate_fc, aes(color = sulfate), size = 2) +
    +  scale_color_viridis_c(option = "B", limits = c(0, 8)) +
    +  theme_bw()
    +

    +

    The sulfate concentrations in the western part of the Four Corners +region (UT, AZ) appear lower than in the eastern part (CO, NM).

    +

    Recall that our goal is to predict the mean sulfate concentration in +the entire region. Block Prediction works (in practice) by first +generating a dense grid of prediction locations that cover the entire +geographic region of interest (here, the Four Corners with boundaries +given in fc_borders). We create this grid and turn the +dense grid (of 5,000 points) into an sf object by +running:

    +
    +grid_size <- 5000
    +fc_grid <- st_sample(fc_borders, grid_size, type = "hexagonal")
    +fc_grid <- st_as_sf(fc_grid)
    +

    Depending on the size of the spatial domain, consider increasing or +decreasing the grid size. The larger the grid size, the more accurate +the Block Prediction, though the returns diminish rapidly and add +additional computational burden.

    +

    We visualize the spatial locations (small, black circles) on the grid +with the observed sulfate concentrations overlain:

    +
    +ggplot() + 
    +  geom_sf(data = fc_borders, fill = "transparent") +
    +  geom_sf(data = fc_grid, size = 0.2) +
    +  geom_sf(data = sulfate_fc, aes(color = sulfate), size = 4) +
    +  scale_color_viridis_c(option = "B", limits = c(0, 8)) +
    +  theme_bw()
    +

    +

    Intuitively, Block Prediction works by making point predictions at +each point in the dense grid and then combining each prediction in a way +that respects the uncertainties associated with the fitted spatial +model. We obtain predictions at each point in the dense grid using the +sulfmod model that was fit to all of the sulfate data in +the conterminous United States by running

    +
    +fc_grid$preds <- predict(sulfmod, newdata = fc_grid)
    +

    We visualize these point predictions by running

    +
    +ggplot() + 
    +  geom_sf(data = fc_borders, fill = "transparent") +
    +  geom_sf(data = fc_grid, aes(color = preds), size = 0.2) +
    +  geom_sf(data = sulfate_fc, aes(color = sulfate), size = 4) +
    +  scale_color_viridis_c(option = "B", limits = c(0, 8)) +
    +  theme_bw()
    +

    +

    We can combine the point predictions via Block Prediction by adding a +block = TRUE argument to predict():

    +
    +predict(sulfmod, newdata = fc_grid, block = TRUE)
    +
    #> [1] 3.487391
    +

    Just like with predict.splm(), we can obtain appropriate +prediction uncertainty using the interval argument:

    +
    +predict(sulfmod, newdata = fc_grid, block = TRUE, interval = "prediction")
    +
    #>        fit      lwr      upr
    +#> 1 3.487391 2.062777 4.912005
    +

    Now suppose that we wanted to predict average sulfate concentrations +in Colorado. First, we subset the Four Corners region to Colorado:

    +
    +co_borders <- fc_borders[fc_borders$STUSPS == "CO", ]
    +

    Then, we subset the dense grid to Colordao:

    +
    +co_grid <- fc_grid[co_borders, ]
    +

    Finally, we use Block Prediction:

    +
    +predict(sulfmod, co_grid, block = TRUE, interval = "prediction")
    +
    #>        fit      lwr      upr
    +#> 1 3.954228 2.204428 5.704028
    +

    Alternatively, one could create a new grid for Colordao rather than +subsetting the Four Corners grid.

    +

    We write a helper function that returns the Block Prediction mean +sulfate concentration for each of the four states (by subsetting the +original grid):

    +
    +return_state_mean <- function(state) {
    +  state_borders <- fc_borders[fc_borders$STUSPS == state, ]
    +  state_grid <- fc_grid[state_borders, ]
    +  predict(sulfmod, state_grid, block = TRUE)
    +}
    +fc_borders$mean <- vapply(fc_borders$STUSPS, return_state_mean, numeric(1))
    +

    We can visualize each Block Prediction of mean sulfate +concentration:

    +
    +ggplot() + 
    +  geom_sf(data = fc_borders, aes(fill = mean)) +
    +  scale_fill_viridis_c(option = "B", limits = c(0, 8)) +
    +  theme_bw()
    +

    +
    +
    +

    Some Details +

    +

    A few details:

    +
      +
    • If the spatial model has explanatory variables, each element in +the dense Block Prediction grid must have values of those explanatory +variables. One way to accomplish this, for example, is to leverage a +raster whose layers contain relevant explanatory variables.

    • +
    • There is a nuanced difference between Block Prediction and the +fixed effect coefficients estimated by the model. Block Prediction +predicts the realized mean, while the fixed effect coefficients are +estimates of the underlying process mean. For more, see Dumelle et al. (2022).

    • +
    • Block Prediction is only available for point-referenced spatial +linear models (i.e., models fit via splm()). Block +Prediction is not available for areal data sets (e.g., +spgautor()) or spatial generalized linear models (e.g., +spglm(), spgautor()). Like +predict.splm(), predict.splm(block = TRUE) has +a local argument for large data sets. See +?predict.splm() for more.

    • +
    +
    +
    +

    References +

    +
    +
    +Cressie, Noel. 1993. Statistics for Spatial Data (Revised +Edition). Wiley: Hoboken, NJ. https://doi.org/10.1002/9781119115151. +
    +
    +Dumelle, Michael, Matt Higham, Jay M Ver Hoef, Anthony R Olsen, and Lisa +Madsen. 2022. “A Comparison of Design-Based and Model-Based +Approaches for Finite Population Spatial Sampling and Inference.” +Methods in Ecology and Evolution 13 (9): 2018–29. +
    +
    +Ver Hoef, Jay M. 2008. “Spatial Methods for Plot-Based Sampling of +Wildlife Populations.” Environmental and Ecological +Statistics 15: 3–13. +
    +
    +
    +
    + + + +
    + + + +
    + +
    +

    +

    Site built with pkgdown 2.0.9.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/articles/block_files/figure-html/unnamed-chunk-10-1.png b/docs/articles/block_files/figure-html/unnamed-chunk-10-1.png new file mode 100644 index 0000000..d2d7976 Binary files /dev/null and b/docs/articles/block_files/figure-html/unnamed-chunk-10-1.png differ diff --git a/docs/articles/block_files/figure-html/unnamed-chunk-17-1.png b/docs/articles/block_files/figure-html/unnamed-chunk-17-1.png new file mode 100644 index 0000000..57e7e5b Binary files /dev/null and b/docs/articles/block_files/figure-html/unnamed-chunk-17-1.png differ diff --git a/docs/articles/block_files/figure-html/unnamed-chunk-3-1.png b/docs/articles/block_files/figure-html/unnamed-chunk-3-1.png new file mode 100644 index 0000000..1de9f03 Binary files /dev/null and b/docs/articles/block_files/figure-html/unnamed-chunk-3-1.png differ diff --git a/docs/articles/block_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/block_files/figure-html/unnamed-chunk-5-1.png new file mode 100644 index 0000000..c43fd3b Binary files /dev/null and b/docs/articles/block_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/block_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/block_files/figure-html/unnamed-chunk-6-1.png new file mode 100644 index 0000000..1617d29 Binary files /dev/null and b/docs/articles/block_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/block_files/figure-html/unnamed-chunk-8-1.png b/docs/articles/block_files/figure-html/unnamed-chunk-8-1.png new file mode 100644 index 0000000..6e08430 Binary files /dev/null and b/docs/articles/block_files/figure-html/unnamed-chunk-8-1.png differ diff --git a/docs/articles/emmeans.html b/docs/articles/emmeans.html index acd277c..6df3d67 100644 --- a/docs/articles/emmeans.html +++ b/docs/articles/emmeans.html @@ -40,7 +40,7 @@ spmodel - 0.10.0 + 0.11.0
    @@ -68,6 +68,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • @@ -183,10 +186,10 @@

    Applying emmeans to spmodel Objects distribution of log conductivity in lake by state and year by running

    -ggplot(lake, aes(color = log_cond)) +
    -  geom_sf() +
    -  scale_color_viridis_c() +
    -  theme_gray(base_size = 14)
    +ggplot(lake, aes(color = log_cond)) + + geom_sf() + + scale_color_viridis_c() + + theme_gray(base_size = 14)
    Distribution of log conductivity in the lake data.

    Distribution of log conductivity in the lake data. @@ -621,7 +624,7 @@

    A Multi-Factor Numeric M #> AZ 3.71 0.647 Inf 2.45 4.98 #> CO 5.69 0.312 Inf 5.08 6.30 #> NV 5.39 0.352 Inf 4.70 6.08 -#> UT 6.17 1.450 Inf 3.33 9.02 +#> UT 6.17 1.452 Inf 3.33 9.02 #> #> Degrees-of-freedom method: asymptotic #> Confidence level used: 0.95 @@ -646,7 +649,7 @@

    A Multi-Factor Numeric M #> temp = 7.63, state = UT: #> year emmean SE df asymp.LCL asymp.UCL #> 2012 6.06 0.144 Inf 5.78 6.34 -#> 2017 6.17 1.450 Inf 3.33 9.02 +#> 2017 6.17 1.452 Inf 3.33 9.02 #> #> Degrees-of-freedom method: asymptotic #> Confidence level used: 0.95 @@ -661,28 +664,28 @@

    A Multi-Factor Numeric M #> AZ year2012 - AZ year2017 1.2122 0.716 Inf 1.694 0.6915 #> AZ year2012 - CO year2017 -0.7639 0.461 Inf -1.658 0.7147 #> AZ year2012 - NV year2017 -0.4664 0.489 Inf -0.954 0.9805 -#> AZ year2012 - UT year2017 -1.2457 1.490 Inf -0.835 0.9911 +#> AZ year2012 - UT year2017 -1.2457 1.491 Inf -0.835 0.9911 #> CO year2012 - NV year2012 -0.0557 0.348 Inf -0.160 1.0000 #> CO year2012 - UT year2012 -0.3529 0.283 Inf -1.249 0.9170 #> CO year2012 - AZ year2017 1.9916 0.691 Inf 2.882 0.0762 #> CO year2012 - CO year2017 0.0155 0.367 Inf 0.042 1.0000 #> CO year2012 - NV year2017 0.3130 0.428 Inf 0.732 0.9961 -#> CO year2012 - UT year2017 -0.4664 1.470 Inf -0.317 1.0000 +#> CO year2012 - UT year2017 -0.4664 1.472 Inf -0.317 1.0000 #> NV year2012 - UT year2012 -0.2972 0.288 Inf -1.031 0.9699 #> NV year2012 - AZ year2017 2.0473 0.694 Inf 2.952 0.0627 #> NV year2012 - CO year2017 0.0712 0.400 Inf 0.178 1.0000 #> NV year2012 - NV year2017 0.3687 0.420 Inf 0.878 0.9880 -#> NV year2012 - UT year2017 -0.4107 1.470 Inf -0.279 1.0000 +#> NV year2012 - UT year2017 -0.4107 1.473 Inf -0.279 1.0000 #> UT year2012 - AZ year2017 2.3445 0.663 Inf 3.537 0.0096 #> UT year2012 - CO year2017 0.3684 0.344 Inf 1.072 0.9626 #> UT year2012 - NV year2017 0.6659 0.380 Inf 1.751 0.6535 -#> UT year2012 - UT year2017 -0.1134 1.460 Inf -0.078 1.0000 +#> UT year2012 - UT year2017 -0.1134 1.455 Inf -0.078 1.0000 #> AZ year2017 - CO year2017 -1.9761 0.718 Inf -2.751 0.1076 #> AZ year2017 - NV year2017 -1.6786 0.736 Inf -2.279 0.3050 #> AZ year2017 - UT year2017 -2.4580 1.590 Inf -1.546 0.7823 #> CO year2017 - NV year2017 0.2975 0.470 Inf 0.633 0.9984 -#> CO year2017 - UT year2017 -0.4819 1.490 Inf -0.324 1.0000 -#> NV year2017 - UT year2017 -0.7793 1.490 Inf -0.522 0.9996 +#> CO year2017 - UT year2017 -0.4819 1.485 Inf -0.324 1.0000 +#> NV year2017 - UT year2017 -0.7793 1.494 Inf -0.522 0.9996 #> #> Degrees-of-freedom method: asymptotic #> P value adjustment: tukey method for comparing a family of 8 estimates @@ -693,7 +696,7 @@

    A Multi-Factor Numeric M #> state temp.trend SE df asymp.LCL asymp.UCL #> AZ 0.145 0.0358 Inf 0.0748 0.215 #> CO 0.269 0.0482 Inf 0.1747 0.364 -#> NV 0.168 0.1030 Inf -0.0338 0.369 +#> NV 0.168 0.1029 Inf -0.0338 0.369 #> UT 0.325 0.0390 Inf 0.2486 0.401 #> #> year = 2017: @@ -701,7 +704,7 @@

    A Multi-Factor Numeric M #> AZ 0.225 0.0656 Inf 0.0962 0.354 #> CO 0.293 0.0704 Inf 0.1546 0.431 #> NV 0.178 0.0562 Inf 0.0682 0.288 -#> UT 0.362 0.2440 Inf -0.1159 0.840 +#> UT 0.362 0.2438 Inf -0.1159 0.840 #> #> Degrees-of-freedom method: asymptotic #> Confidence level used: 0.95 @@ -720,13 +723,13 @@

    A Multi-Factor Numeric M #> #> state = NV: #> year temp.trend SE df asymp.LCL asymp.UCL -#> 2012 0.168 0.1030 Inf -0.0338 0.369 +#> 2012 0.168 0.1029 Inf -0.0338 0.369 #> 2017 0.178 0.0562 Inf 0.0682 0.288 #> #> state = UT: #> year temp.trend SE df asymp.LCL asymp.UCL #> 2012 0.325 0.0390 Inf 0.2486 0.401 -#> 2017 0.362 0.2440 Inf -0.1159 0.840 +#> 2017 0.362 0.2438 Inf -0.1159 0.840 #> #> Degrees-of-freedom method: asymptotic #> Confidence level used: 0.95 @@ -736,12 +739,12 @@

    A Multi-Factor Numeric M
    #>  state year temp.trend     SE  df asymp.LCL asymp.UCL
     #>  AZ    2012      0.145 0.0358 Inf    0.0748     0.215
     #>  CO    2012      0.269 0.0482 Inf    0.1747     0.364
    -#>  NV    2012      0.168 0.1030 Inf   -0.0338     0.369
    +#>  NV    2012      0.168 0.1029 Inf   -0.0338     0.369
     #>  UT    2012      0.325 0.0390 Inf    0.2486     0.401
     #>  AZ    2017      0.225 0.0656 Inf    0.0962     0.354
     #>  CO    2017      0.293 0.0704 Inf    0.1546     0.431
     #>  NV    2017      0.178 0.0562 Inf    0.0682     0.288
    -#>  UT    2017      0.362 0.2440 Inf   -0.1159     0.840
    +#>  UT    2017      0.362 0.2438 Inf   -0.1159     0.840
     #> 
     #> Degrees-of-freedom method: asymptotic 
     #> Confidence level used: 0.95
    diff --git a/docs/articles/guide.html b/docs/articles/guide.html index b1264aa..11790f4 100644 --- a/docs/articles/guide.html +++ b/docs/articles/guide.html @@ -40,7 +40,7 @@ spmodel - 0.10.0 + 0.11.0

    @@ -68,6 +68,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • @@ -513,12 +516,12 @@

    Model-Fit StatisticsAIC(spmod, lmod)
    #>       df      AIC
     #> spmod  3 373.2089
    -#> lmod   1 636.0635
    +#> lmod 1 633.6418
     AICc(spmod, lmod)
    #>       df     AICc
     #> spmod  3 373.2754
    -#> lmod   1 636.0745
    +#> lmod 1 633.6528

    The noticeably lower AIC and AICc of the spatial model indicate that it is a better fit to the data than the non-spatial model. Recall that these AIC and AICc comparisons are valid because both models are fit @@ -704,7 +707,7 @@

    The broom functions: #> model n p npar value AIC AICc BIC logLik deviance #> <chr> <int> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 spmod 365 2 3 367. 373. 373. 385. -184. 363. -#> 2 lmod 365 2 1 634. 636. 636. 640. -317. 363. +#> 2 lmod 365 2 1 632. 634. 634. 638. -316. 363. #> # ℹ 1 more variable: pseudo.r.squared <dbl>

    Finally, the augment() function augments the original data with model diagnostics:

    @@ -2059,7 +2062,8 @@

    References -———. 1993. Statistics for Spatial Data. John Wiley & Sons. +———. 1993. Statistics for Spatial Data (Revised Edition). +Wiley: Hoboken, NJ. https://doi.org/10.1002/9781119115151.
    Curriero, Frank C, and Subhash Lele. 1999. “A Composite Likelihood diff --git a/docs/articles/index.html b/docs/articles/index.html index cba8181..6b3b93d 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0
    @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • @@ -80,6 +83,8 @@

    Articles

    Using emmeans to Estimate Marginal Means of spmodel Objects
    +
    Block Prediction (i.e., Block Kriging) in spmodel
    +
    Technical Details
    diff --git a/docs/articles/introduction.html b/docs/articles/introduction.html index 878bf4b..f3ffb90 100644 --- a/docs/articles/introduction.html +++ b/docs/articles/introduction.html @@ -40,7 +40,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -68,6 +68,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • @@ -436,7 +439,7 @@

    Model Summaries#> model n p npar value AIC AICc BIC logLik deviance #> <chr> <int> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 spmod 365 2 3 367. 373. 373. 385. -184. 363. -#> 2 lmod 365 2 1 634. 636. 636. 640. -317. 363. +#> 2 lmod 365 2 1 632. 634. 634. 638. -316. 363. #> # ℹ 1 more variable: pseudo.r.squared <dbl>

    The much lower AIC and AICc for the spatial linear model indicates it is a much better fit to the data. Outside of glance() and diff --git a/docs/articles/technical.html b/docs/articles/technical.html index 4e563da..5eb4914 100644 --- a/docs/articles/technical.html +++ b/docs/articles/technical.html @@ -40,7 +40,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -68,6 +68,9 @@

  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • @@ -996,6 +999,87 @@

    Big Data
    +

    Block Prediction +

    +

    Rather than making predictions at point-referenced locations, Block +Prediction (i.e., Block Kriging) is a technique used to predict an +average (or total) in some region (i.e., spatial domain). When +interval = "none" or interval = "prediction", +the (empirical) Block Prediction (BP) is given by \[\begin{equation}\label{eq:bp_pred} + \mathbf{\dot{y}}_B = \mathbf{X}_{B} \hat{\boldsymbol{\beta}} + +\hat{\boldsymbol{\Sigma}}_{B} \hat{\boldsymbol{\Sigma}}^{-1}_{o} +(\mathbf{y}_o - \mathbf{X}_o \hat{\boldsymbol{\beta}}). +\end{equation}\] The quantity \(\mathbf{X}_{B} = [\mathbf{x}_{1, B}, +\mathbf{x}_{2, B}, ... , \mathbf{x}_{k, B}]^\top\) where \(j = 1, 2, ..., k\) indexes the columns of +\(\mathbf{X}_{B}\) and \(\mathbf{x}_{j, B} = \frac{1}{|B|}\int_B +\mathbf{x}_j d\mathbf{s}\) for the \(\mathbf{s}\) points in the region. The +total area (volume) of the region is \(|B|\) . The quantity \(\hat{\boldsymbol{\Sigma}}_{B} = +[\hat{\boldsymbol{\Sigma}}_1, \hat{\boldsymbol{\Sigma}}_2, ... , +\hat{\boldsymbol{\Sigma}}_n]^\top\) where \(i = 1, 2, ... , n\) indexes each element in +\(\mathbf{y}_o\) and \(\hat{\boldsymbol{\Sigma}}_i = \frac{1}{|B|}\int_B +\text{Cov}(\mathbf{y}_B, \text{y}_i) d\mathbf{s}\). The quantity +\(\text{Cov}(\mathbf{y}_B, +\text{y}_i)\) represents the covariance between \(\text{y}_i\) and all other points in the +region. In practice, the Block Prediction integrals are approximated +using summation on a fine grid of \(G\) +points, similar to other numerical integration techniques. That is, +\(\mathbf{x}_{j, B} \approx +\frac{1}{|B|}\sum_{g = 1}^G \mathbf{x}_g\) and similarly for +\(\hat{\boldsymbol{\Sigma}}_i\), where +\(g\) indexes the points on the fine +grid. Intuitively, these summations approximate average values in the +entire region.

    +

    When interval = "prediction", the (100 \(\times\) level)% prediction +interval for \(\mathbf{\dot{y}}_B\) is +\(\mathbf{\dot{y}}_B \pm z^* +\sqrt{\sigma^2_B}\), where \(\sigma^2_B += \sigma^{2*}_B - \hat{\boldsymbol{\Sigma}}_{B} +\hat{\boldsymbol{\Sigma}}^{-1}_{o}\hat{\boldsymbol{\Sigma}}_{B}^\top + +\mathbf{Q}_B (\mathbf{X}_o^\top \hat{\boldsymbol{\Sigma}}_o^{-1} +\mathbf{X}_o)^{-1} \mathbf{Q}_B^\top\). The quantity \(\sigma^{2*}_B = \frac{1}{|B|^2}\int_B \int_B +\text{Cov}(\text{y}_\mathbf{s}, \text{y}_\mathbf{u}) +d\mathbf{s}d\mathbf{u}\), where \(\mathbf{s}\) and \(\mathbf{u}\) represent points in the region +(the product of \(\mathbf{s}\) and +\(\mathbf{u}\) contains all possible +pairs of points in the region). Intuitively, \(\sigma^{2*}_B\) approximates the average +covariance between any two points in the region though summation over +the fine grid. That is, \(\sigma^{2*}_B +\approx \frac{1}{|B|^2}\sum_{g_i = 1}^G \sum_{g_j = 1}^G +\text{Cov}(\text{y}_{g_i}, \text{y}_{g_j})\). The quantity \(\mathbf{Q}_B = \mathbf{X}_B - +\hat{\boldsymbol{\Sigma}}_B \hat{\boldsymbol{\Sigma}}^{-1}_o +\mathbf{X}_o\).

    +

    When interval = "confidence", the average process mean +(i.e., not the realized mean) and uncertainties are returned from the +underlying model. The (process) mean estimate is \(\mathbf{X}_{B} \hat{\boldsymbol{\beta}}\) +and a (100 \(\times\) +level)% confidence interval is \(\mathbf{X}_{B} \hat{\boldsymbol{\beta}} \pm z^* +\sqrt{\mathbf{X}_{B} (\mathbf{X}^\top_o \hat{\boldsymbol{\Sigma}}_o^{-1} +\mathbf{X}_o)^{-1} \mathbf{X}_{B}^\top}\).

    +

    For Big Data, when local = TRUE, the same approach is +applied as for point prediction but adjusted slightly to accommodate the +averaging necessary for Block Prediction. Thus, when +method = "covariance" (the default), the size +observations having the highest average covariance with elements of +\(\mathbf{y}_o\) are used to find the +subsets \(\check{\mathbf{X}}_o\), \(\check{\boldsymbol{\Sigma}}_o\), and \(\check{\mathbf{y}}_o\). When +method = "distance", the size observations +having the smallest average distance from elements of \(\mathbf{y}_o\) are used to find the subsets +\(\check{\mathbf{X}}_o\), \(\check{\boldsymbol{\Sigma}}_o\), and \(\check{\mathbf{y}}_o\). Recall that these +two methods are equivalent for processes without anisotropy, random +effects, or partition factors, but can differ otherwise. The default is +size = 1000, which is much larger than for point +prediction. This is because for Block Prediction, the Cholesky +decomposition of \(\check{\boldsymbol{\Sigma}}_o\) needs to +only be computed once (rather than separately for each \(\check{\boldsymbol{\Sigma}}_o\) associated +with each prediction location, as for point prediction).

    +

    Currently, the fine grid used to obtain Block Predictions is supplied +by the user via newdata. For an overview of Block +Prediction, see Cressie (1993). For +applications to a finite population (i.e., a region with a finite number +of point locations), see Ver Hoef (2008) +and Dumelle et al. (2022).

    +
    +
    Cribari-Neto, Francisco, and Achim Zeileis. 2010. “Beta Regression @@ -3562,6 +3647,12 @@

    References +Dumelle, Michael, Matt Higham, Jay M Ver Hoef, Anthony R Olsen, and Lisa +Madsen. 2022. “A Comparison of Design-Based and Model-Based +Approaches for Finite Population Spatial Sampling and Inference.” +Methods in Ecology and Evolution 13 (9): 2018–29. +

    Espinheira, Patrícia L, Silvia LP Ferrari, and Francisco Cribari-Neto. 2008. “On Beta Regression Residuals.” Journal of @@ -3760,9 +3851,14 @@

    Referenceshttp://www.stats.ox.ac.uk/pub/MASS4/.

    +
    +Ver Hoef, Jay M. 2008. “Spatial Methods for Plot-Based Sampling of +Wildlife Populations.” Environmental and Ecological +Statistics 15: 3–13. +
    -Ver Hoef, Jay M. 2012. “Who Invented the Delta Method?” -The American Statistician 66 (2): 124–27. +———. 2012. “Who Invented the Delta Method?” The +American Statistician 66 (2): 124–27.
    ———. 2018. “Kriging Models for Linear Networks and Non-Euclidean diff --git a/docs/authors.html b/docs/authors.html index d6b0785..bf5c972 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0
    @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/index.html b/docs/index.html index 0b8687a..580ff9e 100644 --- a/docs/index.html +++ b/docs/index.html @@ -40,7 +40,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -68,6 +68,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/news/index.html b/docs/news/index.html index 028affd..1b7ebc1 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • @@ -70,9 +73,23 @@

    Changelog

    - + +
    +

    Major Updates

    +
    • Added support for Block Prediction (i.e., Block Kriging) to estimate averages (and their uncertainties) over a geographic region.
    • +
    • Added a vignette to the spmodel website titled “Block Prediction (i.e., Block Kriging) in spmodel”.
    • +
    • Added a fc_borders data set which contains borders for the Four Corners states in the United States.
    • +
    +
    +

    Bug Fixes

    +
    • Fixed a bug that incorrectly calculated the log determinant of the fixed effects in the restricted log likelihood.
    • +
    • Fixed a bug that caused an error when covmatrix(..., cov_type = "pred.pred") was called on object with a non-NULL newdata element.
    • +
    +
    +
    +
    -

    Minor updates

    +

    Minor Updates

    • Added a robust semivariogram option to esv(); see the robustargument to esv() (#28).
    • Added "none" and "ie" spatial covariance types (via spcov_type or spcov_initial) to spautor(), spgautor(), and spautorRF() (#27).
    • Fixed the aspect ratio at one for level curve plots of fitted model objects (anisotropic and isotropic).
    • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 2298bcf..815250a 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -2,12 +2,13 @@ pandoc: 3.1.11 pkgdown: 2.0.9 pkgdown_sha: ~ articles: + block: block.html emmeans: emmeans.html guide: guide.html introduction: introduction.html SPGLMs: SPGLMs.html technical: technical.html -last_built: 2025-03-12T23:35Z +last_built: 2025-07-03T16:09Z urls: reference: https://usepa.github.io/spmodel/reference article: https://usepa.github.io/spmodel/articles diff --git a/docs/reference/AICc.html b/docs/reference/AICc.html index cd734fe..0d81337 100644 --- a/docs/reference/AICc.html +++ b/docs/reference/AICc.html @@ -19,7 +19,7 @@ spmodel - 0.10.0 + 0.11.0
    @@ -45,6 +45,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/AUROC.html b/docs/reference/AUROC.html index e5eece9..9cc4484 100644 --- a/docs/reference/AUROC.html +++ b/docs/reference/AUROC.html @@ -19,7 +19,7 @@ spmodel - 0.10.0 + 0.11.0
    @@ -45,6 +45,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/anova.spmodel.html b/docs/reference/anova.spmodel.html index f0de40f..0987a48 100644 --- a/docs/reference/anova.spmodel.html +++ b/docs/reference/anova.spmodel.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • @@ -231,7 +234,7 @@

    Examples

    #> # A tibble: 1 × 5 #> full reduced df statistic p.value #> <chr> <chr> <int> <dbl> <dbl> -#> 1 spmod lmod 2 4.17 0.124 +#> 1 spmod lmod 2 2.38 0.304 diff --git a/docs/reference/augment.spmodel.html b/docs/reference/augment.spmodel.html index 5f60a7f..46f3df5 100644 --- a/docs/reference/augment.spmodel.html +++ b/docs/reference/augment.spmodel.html @@ -27,7 +27,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -53,6 +53,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/caribou.html b/docs/reference/caribou.html index a35e44c..919fe1b 100644 --- a/docs/reference/caribou.html +++ b/docs/reference/caribou.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/coef.spmodel.html b/docs/reference/coef.spmodel.html index be37a8a..433b9a7 100644 --- a/docs/reference/coef.spmodel.html +++ b/docs/reference/coef.spmodel.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/confint.spmodel.html b/docs/reference/confint.spmodel.html index 59763c9..1df91bf 100644 --- a/docs/reference/confint.spmodel.html +++ b/docs/reference/confint.spmodel.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/cooks.distance.spmodel.html b/docs/reference/cooks.distance.spmodel.html index 8fe3cfe..fb41f07 100644 --- a/docs/reference/cooks.distance.spmodel.html +++ b/docs/reference/cooks.distance.spmodel.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/covmatrix.html b/docs/reference/covmatrix.html index 7de0c10..3af4f8d 100644 --- a/docs/reference/covmatrix.html +++ b/docs/reference/covmatrix.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/deviance.spmodel.html b/docs/reference/deviance.spmodel.html index ce787ac..d9a9337 100644 --- a/docs/reference/deviance.spmodel.html +++ b/docs/reference/deviance.spmodel.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/dispersion_initial.html b/docs/reference/dispersion_initial.html index 7497df6..d866fee 100644 --- a/docs/reference/dispersion_initial.html +++ b/docs/reference/dispersion_initial.html @@ -19,7 +19,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -45,6 +45,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/dispersion_params.html b/docs/reference/dispersion_params.html index 9ab938e..eedae59 100644 --- a/docs/reference/dispersion_params.html +++ b/docs/reference/dispersion_params.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/esv.html b/docs/reference/esv.html index 3456e5a..0bbbfb5 100644 --- a/docs/reference/esv.html +++ b/docs/reference/esv.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/fc_borders.html b/docs/reference/fc_borders.html new file mode 100644 index 0000000..feddffe --- /dev/null +++ b/docs/reference/fc_borders.html @@ -0,0 +1,123 @@ + +Four Corners State Borders — fc_borders • spmodel + + +
    +
    + + + +
    +
    + + +
    +

    State borders for the four corners states in the United States: +Arizona, Colorado, New Mexico, Utah.

    +
    + +
    +
    fc_borders
    +
    + +
    +

    Format

    +

    An sf object with 4 rows and 4 columns:

    +
    • NAME: State name.

    • +
    • STUSPS: State postal code.

    • +
    • GEO.ID: State GEO.ID from the United States Census Bureau.

    • +
    • geometry: POINT geometry representing coordinates in a NAD83 +projection (EPSG: 5070). Distances between points are in meters.

    • +
    +
    +

    Source

    +

    The data source is the United States Census Bureau TIGER/Line Shapefiles.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.9.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/fitted.spmodel.html b/docs/reference/fitted.spmodel.html index f04a88b..c39bee7 100644 --- a/docs/reference/fitted.spmodel.html +++ b/docs/reference/fitted.spmodel.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/formula.spmodel.html b/docs/reference/formula.spmodel.html index 7052847..0873ebb 100644 --- a/docs/reference/formula.spmodel.html +++ b/docs/reference/formula.spmodel.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • @@ -113,7 +116,7 @@

    Examples

    ) formula(spmod) #> z ~ water + tarp -#> <environment: 0x000002e79fd1b338> +#> <environment: 0x000001775f5ba208> diff --git a/docs/reference/glance.spmodel.html b/docs/reference/glance.spmodel.html index 855409b..99a2ecb 100644 --- a/docs/reference/glance.spmodel.html +++ b/docs/reference/glance.spmodel.html @@ -21,7 +21,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -47,6 +47,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/glances.html b/docs/reference/glances.html index cacf4a5..ee4d80b 100644 --- a/docs/reference/glances.html +++ b/docs/reference/glances.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • @@ -150,17 +153,17 @@

    Examples

    ) glances(lmod, spmod) #> # A tibble: 2 × 11 -#> model n p npar value AIC AICc BIC logLik deviance -#> <chr> <int> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 lmod 30 4 1 -1.68 0.323 0.466 1.72 0.838 26 -#> 2 spmod 30 4 3 -5.85 0.150 1.07 4.35 2.92 26 +#> model n p npar value AIC AICc BIC logLik deviance +#> <chr> <int> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> +#> 1 lmod 30 4 1 -3.47 -1.47 -1.33 -0.0674 1.73 26 +#> 2 spmod 30 4 3 -5.85 0.150 1.07 4.35 2.92 26 #> # ℹ 1 more variable: pseudo.r.squared <dbl> glances(lmod, spmod, sort_by = "logLik", decreasing = TRUE) #> # A tibble: 2 × 11 -#> model n p npar value AIC AICc BIC logLik deviance -#> <chr> <int> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 spmod 30 4 3 -5.85 0.150 1.07 4.35 2.92 26 -#> 2 lmod 30 4 1 -1.68 0.323 0.466 1.72 0.838 26 +#> model n p npar value AIC AICc BIC logLik deviance +#> <chr> <int> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> +#> 1 spmod 30 4 3 -5.85 0.150 1.07 4.35 2.92 26 +#> 2 lmod 30 4 1 -3.47 -1.47 -1.33 -0.0674 1.73 26 #> # ℹ 1 more variable: pseudo.r.squared <dbl> diff --git a/docs/reference/hatvalues.spmodel.html b/docs/reference/hatvalues.spmodel.html index 3066d00..31a4236 100644 --- a/docs/reference/hatvalues.spmodel.html +++ b/docs/reference/hatvalues.spmodel.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/index.html b/docs/reference/index.html index 0e0690f..b5917af 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • @@ -124,6 +127,10 @@

    All functions esv() plot(<esv>)

    Compute the empirical semivariogram

    + +

    fc_borders

    + +

    Four Corners State Borders

    fitted(<splm>) fitted.values(<splm>) fitted(<spautor>) fitted.values(<spautor>) fitted(<spglm>) fitted.values(<spglm>) fitted(<spgautor>) fitted.values(<spgautor>)

    diff --git a/docs/reference/influence.spmodel.html b/docs/reference/influence.spmodel.html index 36fb062..c8f8238 100644 --- a/docs/reference/influence.spmodel.html +++ b/docs/reference/influence.spmodel.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/labels.spmodel.html b/docs/reference/labels.spmodel.html index cdea7eb..01d6f37 100644 --- a/docs/reference/labels.spmodel.html +++ b/docs/reference/labels.spmodel.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/lake.html b/docs/reference/lake.html index e16dcac..fe32513 100644 --- a/docs/reference/lake.html +++ b/docs/reference/lake.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/lake_preds.html b/docs/reference/lake_preds.html index a3c161e..593df8e 100644 --- a/docs/reference/lake_preds.html +++ b/docs/reference/lake_preds.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/logLik.spmodel.html b/docs/reference/logLik.spmodel.html index a94d944..c496be9 100644 --- a/docs/reference/logLik.spmodel.html +++ b/docs/reference/logLik.spmodel.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/loocv.html b/docs/reference/loocv.html index 5e6dced..0bc1461 100644 --- a/docs/reference/loocv.html +++ b/docs/reference/loocv.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/model.frame.spmodel.html b/docs/reference/model.frame.spmodel.html index fb4ea3b..f07ddac 100644 --- a/docs/reference/model.frame.spmodel.html +++ b/docs/reference/model.frame.spmodel.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/model.matrix.spmodel.html b/docs/reference/model.matrix.spmodel.html index 7102c23..20f7fa6 100644 --- a/docs/reference/model.matrix.spmodel.html +++ b/docs/reference/model.matrix.spmodel.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/moose.html b/docs/reference/moose.html index 83bc48c..ecb063c 100644 --- a/docs/reference/moose.html +++ b/docs/reference/moose.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/moose_preds.html b/docs/reference/moose_preds.html index 4589b5f..a584998 100644 --- a/docs/reference/moose_preds.html +++ b/docs/reference/moose_preds.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/moss.html b/docs/reference/moss.html index 8f198b9..e4ebcc0 100644 --- a/docs/reference/moss.html +++ b/docs/reference/moss.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/plot.spmodel.html b/docs/reference/plot.spmodel.html index db33b2c..9d47105 100644 --- a/docs/reference/plot.spmodel.html +++ b/docs/reference/plot.spmodel.html @@ -20,7 +20,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -46,6 +46,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/predict.spmodel.html b/docs/reference/predict.spmodel.html index f5271c2..0a0e810 100644 --- a/docs/reference/predict.spmodel.html +++ b/docs/reference/predict.spmodel.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • @@ -85,6 +88,7 @@

    Model predictions (Kriging)

    interval = c("none", "confidence", "prediction"), level = 0.95, type = c("response", "terms"), + block = FALSE, local, terms = NULL, na.action = na.fail, @@ -275,6 +279,14 @@

    Arguments

    spglm() or spgautor() model objects), or terms scale.

    +
    block
    +

    A logical indicating whether a block prediction over the entire region +in newdata should be returned. When block is TRUE, +newdata should be a dense grid of prediction locations that span +the entire region. The default is FALSE, which +returns point predictions for each location on newdata.

    + +
    local

    A optional logical or list controlling the big data approximation. If omitted, local is set to TRUE or FALSE based on the observed data sample size (i.e., sample size of the fitted @@ -310,7 +322,16 @@

    Arguments

    initialize default arguments for the other list elements. If local is TRUE, defaults for local are chosen such that local is transformed into -list(size = 100, method = "covariance", parallel = FALSE).

    +list(size = 100, method = "covariance", parallel = FALSE).

    +

    If block is TRUE, local accepts method and size, +and method takes values of "all", "covariance", +and "distance", similar as when block is FALSE. +The default method is "distance" with size 1000. This default size is +much larger than when block is FALSE. This is because when block is TRUE, +covariances and explanatory variables are averaged before prediction, which greatly +reduces computational burden, only requiring the Cholesky decomposition +of one observed covariance matrix. Because the computational burden is reduced dramatically when block is TRUE, +parallel processing is not needed and hence, parallel and ncores are ignored if specified in local.

    terms
    diff --git a/docs/reference/print.spmodel.html b/docs/reference/print.spmodel.html index 186b5af..27f75cc 100644 --- a/docs/reference/print.spmodel.html +++ b/docs/reference/print.spmodel.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/pseudoR2.html b/docs/reference/pseudoR2.html index 9bbfc99..a1802ba 100644 --- a/docs/reference/pseudoR2.html +++ b/docs/reference/pseudoR2.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/randcov_initial.html b/docs/reference/randcov_initial.html index 6c9c03a..c7f83d3 100644 --- a/docs/reference/randcov_initial.html +++ b/docs/reference/randcov_initial.html @@ -19,7 +19,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -45,6 +45,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/randcov_params.html b/docs/reference/randcov_params.html index 7bcede9..d574631 100644 --- a/docs/reference/randcov_params.html +++ b/docs/reference/randcov_params.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/reexports.html b/docs/reference/reexports.html index a5d69b3..e1675ec 100644 --- a/docs/reference/reexports.html +++ b/docs/reference/reexports.html @@ -24,7 +24,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -50,6 +50,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/residuals.spmodel.html b/docs/reference/residuals.spmodel.html index feb2a33..dbd3b3a 100644 --- a/docs/reference/residuals.spmodel.html +++ b/docs/reference/residuals.spmodel.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/seal.html b/docs/reference/seal.html index c4aaeed..b78083f 100644 --- a/docs/reference/seal.html +++ b/docs/reference/seal.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/spautor.html b/docs/reference/spautor.html index 3d3fcb5..8d3c4ec 100644 --- a/docs/reference/spautor.html +++ b/docs/reference/spautor.html @@ -19,7 +19,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -45,6 +45,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/spautorRF.html b/docs/reference/spautorRF.html index 14148f7..68fd6fb 100644 --- a/docs/reference/spautorRF.html +++ b/docs/reference/spautorRF.html @@ -22,7 +22,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -48,6 +48,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/spcov_initial.html b/docs/reference/spcov_initial.html index d5c8fc0..77b91a9 100644 --- a/docs/reference/spcov_initial.html +++ b/docs/reference/spcov_initial.html @@ -19,7 +19,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -45,6 +45,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/spcov_params.html b/docs/reference/spcov_params.html index 405205c..0b5bd43 100644 --- a/docs/reference/spcov_params.html +++ b/docs/reference/spcov_params.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/spgautor.html b/docs/reference/spgautor.html index 7b78b7c..16b9bd2 100644 --- a/docs/reference/spgautor.html +++ b/docs/reference/spgautor.html @@ -20,7 +20,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -46,6 +46,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/spglm.html b/docs/reference/spglm.html index 5cb7df4..a12e4ef 100644 --- a/docs/reference/spglm.html +++ b/docs/reference/spglm.html @@ -20,7 +20,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -46,6 +46,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/splm.html b/docs/reference/splm.html index 4129c0e..fe2195f 100644 --- a/docs/reference/splm.html +++ b/docs/reference/splm.html @@ -20,7 +20,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -46,6 +46,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/splmRF.html b/docs/reference/splmRF.html index e58fff5..a5848be 100644 --- a/docs/reference/splmRF.html +++ b/docs/reference/splmRF.html @@ -21,7 +21,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -47,6 +47,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/spmodel-package.html b/docs/reference/spmodel-package.html index 0dd6ade..022721f 100644 --- a/docs/reference/spmodel-package.html +++ b/docs/reference/spmodel-package.html @@ -19,7 +19,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -45,6 +45,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/sprbeta.html b/docs/reference/sprbeta.html index 03bb5cb..b3390d0 100644 --- a/docs/reference/sprbeta.html +++ b/docs/reference/sprbeta.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/sprbinom.html b/docs/reference/sprbinom.html index 559e32d..6dde79d 100644 --- a/docs/reference/sprbinom.html +++ b/docs/reference/sprbinom.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/sprgamma.html b/docs/reference/sprgamma.html index 659006d..98c3b6a 100644 --- a/docs/reference/sprgamma.html +++ b/docs/reference/sprgamma.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/sprinvgauss.html b/docs/reference/sprinvgauss.html index 9ce4581..f3aef1d 100644 --- a/docs/reference/sprinvgauss.html +++ b/docs/reference/sprinvgauss.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/sprnbinom.html b/docs/reference/sprnbinom.html index b7e55f4..1b2d3d7 100644 --- a/docs/reference/sprnbinom.html +++ b/docs/reference/sprnbinom.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/sprnorm.html b/docs/reference/sprnorm.html index cb237da..4cbee47 100644 --- a/docs/reference/sprnorm.html +++ b/docs/reference/sprnorm.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/sprpois.html b/docs/reference/sprpois.html index ac1f61f..5878556 100644 --- a/docs/reference/sprpois.html +++ b/docs/reference/sprpois.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/sulfate.html b/docs/reference/sulfate.html index 1fd9554..8c0852f 100644 --- a/docs/reference/sulfate.html +++ b/docs/reference/sulfate.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/sulfate_preds.html b/docs/reference/sulfate_preds.html index c0bbd2e..dbe784e 100644 --- a/docs/reference/sulfate_preds.html +++ b/docs/reference/sulfate_preds.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/summary.spmodel.html b/docs/reference/summary.spmodel.html index 4717488..20b6e89 100644 --- a/docs/reference/summary.spmodel.html +++ b/docs/reference/summary.spmodel.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/texas.html b/docs/reference/texas.html index 35ad661..434fe57 100644 --- a/docs/reference/texas.html +++ b/docs/reference/texas.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/tidy.spmodel.html b/docs/reference/tidy.spmodel.html index 4a08aa7..3d3b21c 100644 --- a/docs/reference/tidy.spmodel.html +++ b/docs/reference/tidy.spmodel.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/varcomp.html b/docs/reference/varcomp.html index b3dbe06..495a5ed 100644 --- a/docs/reference/varcomp.html +++ b/docs/reference/varcomp.html @@ -18,7 +18,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -44,6 +44,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/reference/vcov.spmodel.html b/docs/reference/vcov.spmodel.html index 630f49c..3467af5 100644 --- a/docs/reference/vcov.spmodel.html +++ b/docs/reference/vcov.spmodel.html @@ -17,7 +17,7 @@ spmodel - 0.10.0 + 0.11.0 @@ -43,6 +43,9 @@
  • Using emmeans to Estimate Marginal Means of spmodel Objects
  • +
  • + Block Prediction (i.e., Block Kriging) in spmodel +
  • Technical Details
  • diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 7695c1b..65fcc2b 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,6 +3,9 @@ https://usepa.github.io/spmodel/404.html + + https://usepa.github.io/spmodel/articles/articles/block.html + https://usepa.github.io/spmodel/articles/articles/emmeans.html @@ -18,6 +21,9 @@ https://usepa.github.io/spmodel/articles/basics.html + + https://usepa.github.io/spmodel/articles/block.html + https://usepa.github.io/spmodel/articles/emmeans.html @@ -114,6 +120,9 @@ https://usepa.github.io/spmodel/reference/esv.html + + https://usepa.github.io/spmodel/reference/fc_borders.html + https://usepa.github.io/spmodel/reference/fitted.spmod.html diff --git a/inst/references.bib b/inst/references.bib index 7595a7c..0eac4ba 100644 --- a/inst/references.bib +++ b/inst/references.bib @@ -165,10 +165,11 @@ @article{cressie1985fitting } @book{cressie1993statistics, - title={Statistics for Spatial Data}, + title={Statistics for spatial data (Revised edition)}, author={Cressie, Noel}, year={1993}, - publisher={John Wiley \& Sons} + publisher={Wiley: Hoboken, NJ}, + doi={10.1002/9781119115151} } @article{cribari2010beta, @@ -201,6 +202,17 @@ @article{diamond1984robustness publisher={Springer} } +@article{dumelle2022comparison, + title={A comparison of design-based and model-based approaches for finite population spatial sampling and inference}, + author={Dumelle, Michael and Higham, Matt and Ver Hoef, Jay M and Olsen, Anthony R and Madsen, Lisa}, + journal={Methods in Ecology and Evolution}, + volume={13}, + number={9}, + pages={2018--2029}, + year={2022}, + publisher={Wiley Online Library} +} + @article{espinheira2008beta, title={On beta regression residuals}, author={Espinheira, Patr{\'i}cia L and Ferrari, Silvia LP and Cribari-Neto, Francisco}, @@ -671,6 +683,16 @@ @book{venables2002mass url = {http://www.stats.ox.ac.uk/pub/MASS4/}, } +@article{ver2008spatial, + title={Spatial methods for plot-based sampling of wildlife populations}, + author={Ver Hoef, Jay M}, + journal={Environmental and Ecological Statistics}, + volume={15}, + pages={3--13}, + year={2008}, + publisher={Springer} +} + @article{ver2012invented, title={Who invented the delta method?}, author={Ver Hoef, Jay M}, diff --git a/man/fc_borders.Rd b/man/fc_borders.Rd new file mode 100644 index 0000000..d0ed4b0 --- /dev/null +++ b/man/fc_borders.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{fc_borders} +\alias{fc_borders} +\title{Four Corners State Borders} +\format{ +An \code{sf} object with 4 rows and 4 columns: + +\itemize{ +\item NAME: State name. +\item STUSPS: State postal code. +\item GEO.ID: State GEO.ID from the United States Census Bureau. +\item geometry: \code{POINT} geometry representing coordinates in a NAD83 +projection (EPSG: 5070). Distances between points are in meters. +} +} +\source{ +The data source is the United States Census Bureau TIGER/Line Shapefiles. +} +\usage{ +fc_borders +} +\description{ +State borders for the four corners states in the United States: +Arizona, Colorado, New Mexico, Utah. +} +\keyword{datasets} diff --git a/man/predict.spmodel.Rd b/man/predict.spmodel.Rd index 9f6133f..72a4378 100644 --- a/man/predict.spmodel.Rd +++ b/man/predict.spmodel.Rd @@ -25,6 +25,7 @@ interval = c("none", "confidence", "prediction"), level = 0.95, type = c("response", "terms"), + block = FALSE, local, terms = NULL, na.action = na.fail, @@ -186,6 +187,12 @@ uncertainties (in \code{.$se.fit}).} \item{type}{The prediction type, either on the response scale, link scale (only for \code{spglm()} or \code{spgautor()} model objects), or terms scale.} +\item{block}{A logical indicating whether a block prediction over the entire region +in \code{newdata} should be returned. When \code{block} is \code{TRUE}, +\code{newdata} should be a dense grid of prediction locations that span +the entire region. The default is \code{FALSE}, which +returns point predictions for each location on \code{newdata}.} + \item{local}{A optional logical or list controlling the big data approximation. If omitted, \code{local} is set to \code{TRUE} or \code{FALSE} based on the observed data sample size (i.e., sample size of the fitted model object) -- if the sample size exceeds 10,000, \code{local} is @@ -223,7 +230,17 @@ When \code{local} is a list, at least one list element must be provided to initialize default arguments for the other list elements. If \code{local} is \code{TRUE}, defaults for \code{local} are chosen such that \code{local} is transformed into -\code{list(size = 100, method = "covariance", parallel = FALSE)}.} +\code{list(size = 100, method = "covariance", parallel = FALSE)}. + +If \code{block} is \code{TRUE}, \code{local} accepts \code{method} and \code{size}, +and \code{method} takes values of \code{"all"}, \code{"covariance"}, +and \code{"distance"}, similar as when \code{block} is \code{FALSE}. +The default \code{method} is \code{"distance"} with size \code{1000}. This default \code{size} is +much larger than when \code{block} is \code{FALSE}. This is because when \code{block} is \code{TRUE}, +covariances and explanatory variables are averaged before prediction, which greatly +reduces computational burden, only requiring the Cholesky decomposition +of one observed covariance matrix. Because the computational burden is reduced dramatically when \code{block} is \code{TRUE}, +parallel processing is not needed and hence, \code{parallel} and \code{ncores} are ignored if specified in \code{local}.} \item{terms}{If \code{type} is \code{"terms"}, the type of terms to be returned, specified via either numeric position or name. The default is all terms are included.} diff --git a/tests/testthat/test-splm.R b/tests/testthat/test-splm.R index 4206567..f07b6a9 100644 --- a/tests/testthat/test-splm.R +++ b/tests/testthat/test-splm.R @@ -115,6 +115,17 @@ test_that("generics work splm point data", { expect_true(inherits(predict(spmod1, newdata = newexdata, type = "terms"), "matrix")) expect_type(predict(spmod1, newdata = newexdata, type = "terms", interval = "confidence"), "list") + # block predict + expect_vector(predict(spmod1, newdata = newexdata, block = TRUE)) + expect_type(predict(spmod1, newdata = newexdata, interval = "prediction", se.fit = TRUE, local = TRUE, block = TRUE), "list") + expect_true(inherits(predict(spmod1, newdata = newexdata, interval = "confidence", level = 0.9, block = TRUE), "matrix")) + expect_true(inherits(predict(spmod1, newdata = newexdata, type = "terms", block = TRUE), "matrix")) + expect_type(predict(spmod1, newdata = newexdata, type = "terms", interval = "confidence", block = TRUE), "list") + expect_length(predict(spmod1, newdata = newexdata, block = TRUE), 1) + expect_length(predict(spmod1, newdata = newexdata, se.fit = TRUE, block = TRUE)$se.fit, 1) + expect_true(predict(spmod1, newdata = newexdata, se.fit = TRUE, block = TRUE)$se.fit >= 0) + expect_equal(dim(predict(spmod1, newdata = newexdata, interval = "prediction", block = TRUE)), c(1, 3)) + # print expect_output(print(spmod1)) expect_output(print(summary(spmod1))) @@ -266,6 +277,17 @@ test_that("generics work splm point data with missing", { expect_true(inherits(predict(spmod1, newdata = newexdata, type = "terms"), "matrix")) expect_type(predict(spmod1, newdata = newexdata, type = "terms", interval = "confidence"), "list") + # block predict + expect_vector(predict(spmod1, newdata = newexdata, block = TRUE)) + expect_type(predict(spmod1, newdata = newexdata, interval = "prediction", se.fit = TRUE, local = TRUE, block = TRUE), "list") + expect_true(inherits(predict(spmod1, newdata = newexdata, interval = "confidence", level = 0.9, block = TRUE), "matrix")) + expect_true(inherits(predict(spmod1, newdata = newexdata, type = "terms", block = TRUE), "matrix")) + expect_type(predict(spmod1, newdata = newexdata, type = "terms", interval = "confidence", block = TRUE), "list") + expect_length(predict(spmod1, newdata = newexdata, block = TRUE), 1) + expect_length(predict(spmod1, newdata = newexdata, se.fit = TRUE, block = TRUE)$se.fit, 1) + expect_true(predict(spmod1, newdata = newexdata, se.fit = TRUE, block = TRUE)$se.fit >= 0) + expect_equal(dim(predict(spmod1, newdata = newexdata, interval = "prediction", block = TRUE)), c(1, 3)) + # print expect_output(print(spmod1)) expect_output(print(summary(spmod1))) @@ -406,11 +428,15 @@ test_that("generics work splm polygon data with missing", { expect_invisible(plot(spmod1, which = 7)) # predict - expect_vector(predict(spmod1)) - expect_type(predict(spmod1, interval = "prediction", se.fit = TRUE, local = TRUE), "list") - expect_true(inherits(predict(spmod1, interval = "confidence", level = 0.9), "matrix")) - expect_true(inherits(predict(spmod1, type = "terms"), "matrix")) - expect_type(predict(spmod1, type = "terms", interval = "confidence"), "list") + expect_vector(predict(spmod1, block = TRUE)) + expect_type(predict(spmod1, interval = "prediction", se.fit = TRUE, local = TRUE, block = TRUE), "list") + expect_true(inherits(predict(spmod1, interval = "confidence", level = 0.9, block = TRUE), "matrix")) + expect_true(inherits(predict(spmod1, type = "terms", block = TRUE), "matrix")) + expect_type(predict(spmod1, type = "terms", interval = "confidence", block = TRUE), "list") + expect_length(predict(spmod1, block = TRUE), 1) + expect_length(predict(spmod1, se.fit = TRUE, block = TRUE)$se.fit, 1) + expect_true(predict(spmod1, se.fit = TRUE, block = TRUE)$se.fit >= 0) + expect_equal(dim(predict(spmod1, interval = "prediction", block = TRUE)), c(1, 3)) # print expect_output(print(spmod1)) diff --git a/vignettes/articles/block.Rmd b/vignettes/articles/block.Rmd new file mode 100644 index 0000000..602d3dc --- /dev/null +++ b/vignettes/articles/block.Rmd @@ -0,0 +1,175 @@ +--- +title: "Block Prediction (i.e., Block Kriging) in spmodel" +author: "Michael Dumelle, Matt Higham, and Jay M. Ver Hoef" +bibliography: '`r system.file("references.bib", package="spmodel")`' +output: + html_document: +editor_options: + chunk_output_type: console +--- + +```{r setup, include = FALSE} +# # jss style +# knitr::opts_chunk$set(prompt=TRUE, echo = TRUE, highlight = FALSE, continue = " + ", comment = "") +# options(replace.assign=TRUE, width=90, prompt="R> ") + +# rmd style +knitr::opts_chunk$set(collapse = FALSE, comment = "#>", warning = FALSE, message = FALSE) + +# loading +library(spmodel) +library(sf) +library(ggplot2) +set.seed(0) +``` + +# Introduction {#sec:introduction} + +`spmodel` is a \textbf{\textsf{R}} package used to fit, summarize, and +predict for a variety of spatial statistical models. This vignette +explores tools for predicting the average value of a response variable in a particular geographic region, an approach known as Block Prediction (i.e., Block Kriging) [@cressie1993statistics; @ver2008spatial]. Before proceeding, we +load `spmodel`, `sf`, and `ggplot2` by running +```{r, eval = FALSE} +library(spmodel) +library(sf) +library(ggplot2) +``` + +If using `spmodel` in a formal publication or report, please cite it. +Citing `spmodel` lets us devote more resources to the package in the +future. We view the `spmodel` citation by running + +```{r} +citation(package = "spmodel") +``` + +# Block Prediction (i.e., Block Kriging) in `spmodel` + +The `sulfate` data is an `sf` object that contains sulfate measurements in the conterminous United States (CONUS). We first visualize the distribution of the sulfate data by running +```{r} +ggplot(sulfate, aes(color = sulfate)) + + geom_sf(size = 2.5) + + scale_color_viridis_c() + + theme_gray(base_size = 18) +``` + +In the "Detailed Guide" vignette, we describe how to fit a spatial linear model to the sulfate data and use it to predict sulfate concentrations at the geographic locations in `sulfate_preds`. But what if we want to predict the average sulfate concentration in the entire CONUS? Or what if we want to predict the average sulfate concentration in a single state like Colorado? Or a region like the Four Corners region of CONUS? We can answer these questions using Block Prediction (i.e., Block Kriging). + +First, we fit a spatial linear model for sulfate using an intercept-only model with an exponential spatial covariance function by running +```{r} +sulfmod <- splm(sulfate ~ 1, sulfate, spcov_type = "exponential") +``` + +We can use this model to perform Block Prediction in any subregion of CONUS. Consider the four corners region of CONUS, which is composed of four states: Arizona (AZ), Colorado (CO), New Mexico (NM), and Utah (UT). `fc_borders` is a simple features object with polygonal geometry that contains the boundaries of these four states. We can visualize the states in this region by running + +```{r} +ggplot(data = fc_borders, aes(fill = STUSPS, label = STUSPS)) + + geom_sf() + + geom_sf_text() +``` + +We subset the sulfate data to the observations from the Four Corners region so that we can zoom in on their values. We implement a slight jitter (random perturbation) of the coordinates to avoid overplotting points that are very close to one another. Moreover, we use a different color scale than for the full `sulfate` data so we can better distinguish among the sulfate concentrations in the region: +```{r} +sulfate_fc <- sulfate[fc_borders, ] +sulfate_fc <- st_jitter(sulfate_fc, factor = 0.04) +ggplot() + + geom_sf(data = fc_borders, fill = "transparent") + + geom_sf(data = sulfate_fc, aes(color = sulfate), size = 2) + + scale_color_viridis_c(option = "B", limits = c(0, 8)) + + theme_bw() +``` + +The sulfate concentrations in the western part of the Four Corners region (UT, AZ) appear lower than in the eastern part (CO, NM). + +Recall that our goal is to predict the mean sulfate concentration in the entire region. Block Prediction works (in practice) by first generating a dense grid of prediction locations that cover the entire geographic region of interest (here, the Four Corners with boundaries given in `fc_borders`). We create this grid and turn the dense grid (of 5,000 points) into an `sf` object by running: +```{r} +grid_size <- 5000 +fc_grid <- st_sample(fc_borders, grid_size, type = "hexagonal") +fc_grid <- st_as_sf(fc_grid) +``` + +Depending on the size of the spatial domain, consider increasing or decreasing the grid size. The larger the grid size, the more accurate the Block Prediction, though the returns diminish rapidly and add additional computational burden. + +We visualize the spatial locations (small, black circles) on the grid with the observed sulfate concentrations overlain: +```{r} +ggplot() + + geom_sf(data = fc_borders, fill = "transparent") + + geom_sf(data = fc_grid, size = 0.2) + + geom_sf(data = sulfate_fc, aes(color = sulfate), size = 4) + + scale_color_viridis_c(option = "B", limits = c(0, 8)) + + theme_bw() +``` + +Intuitively, Block Prediction works by making point predictions at each point in the dense grid and then combining each prediction in a way that respects the uncertainties associated with the fitted spatial model. We obtain predictions at each point in the dense grid using the `sulfmod` model that was fit to all of the sulfate data in the conterminous United States by running +```{r} +fc_grid$preds <- predict(sulfmod, newdata = fc_grid) +``` + +We visualize these point predictions by running +```{r} +ggplot() + + geom_sf(data = fc_borders, fill = "transparent") + + geom_sf(data = fc_grid, aes(color = preds), size = 0.2) + + geom_sf(data = sulfate_fc, aes(color = sulfate), size = 4) + + scale_color_viridis_c(option = "B", limits = c(0, 8)) + + theme_bw() +``` + +We can combine the point predictions via Block Prediction by adding a `block = TRUE` argument to `predict()`: +```{r} +predict(sulfmod, newdata = fc_grid, block = TRUE) +``` + +Just like with `predict.splm()`, we can obtain appropriate prediction uncertainty using the `interval` argument: +```{r} +predict(sulfmod, newdata = fc_grid, block = TRUE, interval = "prediction") +``` + +Now suppose that we wanted to predict average sulfate concentrations in Colorado. First, we subset the Four Corners region to Colorado: +```{r} +co_borders <- fc_borders[fc_borders$STUSPS == "CO", ] +``` + +Then, we subset the dense grid to Colordao: +```{r} +co_grid <- fc_grid[co_borders, ] +``` + +Finally, we use Block Prediction: +```{r} +predict(sulfmod, co_grid, block = TRUE, interval = "prediction") +``` + +Alternatively, one could create a new grid for Colordao rather than subsetting the Four Corners grid. + +We write a helper function that returns the Block Prediction mean sulfate concentration for each of the four states (by subsetting the original grid): +```{r} +return_state_mean <- function(state) { + state_borders <- fc_borders[fc_borders$STUSPS == state, ] + state_grid <- fc_grid[state_borders, ] + predict(sulfmod, state_grid, block = TRUE) +} +fc_borders$mean <- vapply(fc_borders$STUSPS, return_state_mean, numeric(1)) +``` + +We can visualize each Block Prediction of mean sulfate concentration: +```{r} +ggplot() + + geom_sf(data = fc_borders, aes(fill = mean)) + + scale_fill_viridis_c(option = "B", limits = c(0, 8)) + + theme_bw() +``` + +# Some Details + +A few details: + +* If the spatial model has explanatory variables, each element in the dense Block Prediction grid must have values of those explanatory variables. One way to accomplish this, for example, is to leverage a raster whose layers contain relevant explanatory variables. + +* There is a nuanced difference between Block Prediction and the fixed effect coefficients estimated by the model. Block Prediction predicts the realized mean, while the fixed effect coefficients are estimates of the underlying process mean. For more, see @dumelle2022comparison. + +* Block Prediction is only available for point-referenced spatial linear models (i.e., models fit via `splm()`). Block Prediction is not available for areal data sets (e.g., `spgautor()`) or spatial generalized linear models (e.g., `spglm()`, `spgautor()`). Like `predict.splm()`, `predict.splm(block = TRUE)` has a `local` argument for large data sets. See `?predict.splm()` for more. + +# References {.unnumbered} + +
    diff --git a/vignettes/articles/technical.Rmd b/vignettes/articles/technical.Rmd index 16b19de..c36cc91 100644 --- a/vignettes/articles/technical.Rmd +++ b/vignettes/articles/technical.Rmd @@ -352,6 +352,22 @@ When the number of observations in the fitted model (observed data) are large or When the `local` method is `"covariance"`, $\hat{\boldsymbol{\Sigma}}_{uo}$ is computed between the observation being predicted ($\mathbf{y}_u$) and the rest of the observed data. This vector is then ordered and a number of observations (specified via the `size` argument) having the highest covariance with $\mathbf{y}_u$ are subset, yielding $\check{\boldsymbol{\Sigma}}_{uo}$, which has dimension $1 \times size$. Then similarly $\hat{\boldsymbol{\Sigma}}_o$, $\mathbf{y}_o$, and $\mathbf{X}_u$ are also subset by these `size` observations, yielding $\check{\boldsymbol{\Sigma}}_{o}$, $\check{\mathbf{y}}_o$, and $\check{\mathbf{X}}_u$, respectively. The previous prediction equations can be evaluated at $\check{\boldsymbol{\Sigma}}_{uo}$, $\check{\boldsymbol{\Sigma}}_{o}$, $\check{\mathbf{y}}_o$, and $\check{\mathbf{X}}_u$ (except for the quantity $(\mathbf{X}_o^\top \hat{\boldsymbol{\Sigma}}_o^{-1} \mathbf{X}_o)^{-1}$, which is evaluated using all the observed data) to yield predictions and standard errors. When the `local` method is `"distance"`, a similar approach is used except a number of observations (specified via the `size` argument) closest (in terms of Euclidean distance) to $\mathbf{y}_u$ are subset instead. When random effects are not used, partition factors are not used, and the spatial covariance function is monotone decreasing, `"covariance"` and `"distance"` are equivalent. This approach of subsetting the observed data by the set of locations closest in covariance or proximity to $\mathbf{y}_u$ is known as the local neighborhood approach. As long as `size` is relatively small (the default is 100), the local neighborhood approach is very computationally efficient, mainly because $\check{\boldsymbol{\Sigma}}_{o}^{-1}$ is easy to compute. Additional computational efficiency is gained by parallelizing each prediction. +### Block Prediction {#sec:predict-block-lm} + +Rather than making predictions at point-referenced locations, Block Prediction (i.e., Block Kriging) is a technique used to predict an average (or total) in some region (i.e., spatial domain). When `interval = "none"` or `interval = "prediction"`, the (empirical) Block Prediction (BP) is given by +\begin{equation}\label{eq:bp_pred} + \mathbf{\dot{y}}_B = \mathbf{X}_{B} \hat{\boldsymbol{\beta}} + \hat{\boldsymbol{\Sigma}}_{B} \hat{\boldsymbol{\Sigma}}^{-1}_{o} (\mathbf{y}_o - \mathbf{X}_o \hat{\boldsymbol{\beta}}). +\end{equation} +The quantity $\mathbf{X}_{B} = [\mathbf{x}_{1, B}, \mathbf{x}_{2, B}, ... , \mathbf{x}_{k, B}]^\top$ where $j = 1, 2, ..., k$ indexes the columns of $\mathbf{X}_{B}$ and $\mathbf{x}_{j, B} = \frac{1}{|B|}\int_B \mathbf{x}_j d\mathbf{s}$ for the $\mathbf{s}$ points in the region. The total area (volume) of the region is $|B|$ . The quantity $\hat{\boldsymbol{\Sigma}}_{B} = [\hat{\boldsymbol{\Sigma}}_1, \hat{\boldsymbol{\Sigma}}_2, ... , \hat{\boldsymbol{\Sigma}}_n]^\top$ where $i = 1, 2, ... , n$ indexes each element in $\mathbf{y}_o$ and $\hat{\boldsymbol{\Sigma}}_i = \frac{1}{|B|}\int_B \text{Cov}(\mathbf{y}_B, \text{y}_i) d\mathbf{s}$. The quantity $\text{Cov}(\mathbf{y}_B, \text{y}_i)$ represents the covariance between $\text{y}_i$ and all other points in the region. In practice, the Block Prediction integrals are approximated using summation on a fine grid of $G$ points, similar to other numerical integration techniques. That is, $\mathbf{x}_{j, B} \approx \frac{1}{|B|}\sum_{g = 1}^G \mathbf{x}_g$ and similarly for $\hat{\boldsymbol{\Sigma}}_i$, where $g$ indexes the points on the fine grid. Intuitively, these summations approximate average values in the entire region. + +When `interval = "prediction"`, the (100 $\times$ `level`)% prediction interval for $\mathbf{\dot{y}}_B$ is $\mathbf{\dot{y}}_B \pm z^* \sqrt{\sigma^2_B}$, where $\sigma^2_B = \sigma^{2*}_B - \hat{\boldsymbol{\Sigma}}_{B} \hat{\boldsymbol{\Sigma}}^{-1}_{o}\hat{\boldsymbol{\Sigma}}_{B}^\top + \mathbf{Q}_B (\mathbf{X}_o^\top \hat{\boldsymbol{\Sigma}}_o^{-1} \mathbf{X}_o)^{-1} \mathbf{Q}_B^\top$. The quantity $\sigma^{2*}_B = \frac{1}{|B|^2}\int_B \int_B \text{Cov}(\text{y}_\mathbf{s}, \text{y}_\mathbf{u}) d\mathbf{s}d\mathbf{u}$, where $\mathbf{s}$ and $\mathbf{u}$ represent points in the region (the product of $\mathbf{s}$ and $\mathbf{u}$ contains all possible pairs of points in the region). Intuitively, $\sigma^{2*}_B$ approximates the average covariance between any two points in the region though summation over the fine grid. That is, $\sigma^{2*}_B \approx \frac{1}{|B|^2}\sum_{g_i = 1}^G \sum_{g_j = 1}^G \text{Cov}(\text{y}_{g_i}, \text{y}_{g_j})$. The quantity $\mathbf{Q}_B = \mathbf{X}_B - \hat{\boldsymbol{\Sigma}}_B \hat{\boldsymbol{\Sigma}}^{-1}_o \mathbf{X}_o$. + +When `interval = "confidence"`, the average process mean (i.e., not the realized mean) and uncertainties are returned from the underlying model. The (process) mean estimate is $\mathbf{X}_{B} \hat{\boldsymbol{\beta}}$ and a (100 $\times$ `level`)% confidence interval is $\mathbf{X}_{B} \hat{\boldsymbol{\beta}} \pm z^* \sqrt{\mathbf{X}_{B} (\mathbf{X}^\top_o \hat{\boldsymbol{\Sigma}}_o^{-1} \mathbf{X}_o)^{-1} \mathbf{X}_{B}^\top}$. + +For Big Data, when `local = TRUE`, the same approach is applied as for point prediction but adjusted slightly to accommodate the averaging necessary for Block Prediction. Thus, when `method = "covariance"` (the default), the `size` observations having the highest average covariance with elements of $\mathbf{y}_o$ are used to find the subsets $\check{\mathbf{X}}_o$, $\check{\boldsymbol{\Sigma}}_o$, and $\check{\mathbf{y}}_o$. When `method = "distance"`, the `size` observations having the smallest average distance from elements of $\mathbf{y}_o$ are used to find the subsets $\check{\mathbf{X}}_o$, $\check{\boldsymbol{\Sigma}}_o$, and $\check{\mathbf{y}}_o$. Recall that these two methods are equivalent for processes without anisotropy, random effects, or partition factors, but can differ otherwise. The default is `size = 1000`, which is much larger than for point prediction. This is because for Block Prediction, the Cholesky decomposition of $\check{\boldsymbol{\Sigma}}_o$ needs to only be computed once (rather than separately for each $\check{\boldsymbol{\Sigma}}_o$ associated with each prediction location, as for point prediction). + +Currently, the fine grid used to obtain Block Predictions is supplied by the user via `newdata`. For an overview of Block Prediction, see @cressie1993statistics. For applications to a finite population (i.e., a region with a finite number of point locations), see @ver2008spatial and @dumelle2022comparison. + ### `splmRF()` and `spautorRF()` {#sec:rf_pred-lm} Random forest spatial residual model predictions are obtained by combining random forest predictions and spatial linear model predictions (i.e., Kriging) of the random forest residuals. Formally, the random forest spatial residual model predictions of $\mathbf{y}_u$ are given by \begin{equation*}