diff --git a/R/misc.R b/R/misc.R index ac76d0e..d0a1ab0 100644 --- a/R/misc.R +++ b/R/misc.R @@ -67,3 +67,19 @@ mape <- function (obs, pred) { mpe <- function (obs, pred) { sum((obs - pred)/obs)/length(obs) } + +#' Weighted sum-of-squares of residuals +#' +#' @inheritParams rmse +#' @param w weights +#' +ss <- function(obs, pred, w = NULL) { + if(is.null(w)) { + w <- rep(1, length(obs)) + } + if (length(obs) != length(pred) || length(obs) != length(w)) { + cli::cli_abort("`obs`, `pred`, and `w` must have the same length") + } + if(sum(w) == 0) return(NA) + sum(w * (obs - pred)^2) +} diff --git a/R/print_functions.R b/R/print_functions.R index e15dd2b..6a4d989 100644 --- a/R/print_functions.R +++ b/R/print_functions.R @@ -5,6 +5,10 @@ #' #' @export print.mipdeval_results <- function(x, ...) { + if(is.null(x$results)) { + cli::cli_alert_info("No forecasting info available, did you run with `vpc_only=TRUE`?") + return(invisible()) + } print(x$stats_summ, ...) print(x$shrinkage, ...) print(x$bayesian_impact, ...) diff --git a/R/run_eval.R b/R/run_eval.R index 7d57124..55890e4 100644 --- a/R/run_eval.R +++ b/R/run_eval.R @@ -22,7 +22,7 @@ #' by default (so no data leakage), but it can be switched off if we want to #' match the behavior of `PsN::proseval` exactly. #' @param incremental should MAP Bayesian do incremental fits in the iterative -#' loop. I.e. in this case it would use the first iterations MAP Bayesian +#' loop? I.e. in this case it would use the first iterations MAP Bayesian #' estimates as input for the second iteration, and so forth. The uncertainty #' around the MAP estimates would be used as the new `omega` matrix. This #' approach has been called "model predictive control (MPC)" @@ -119,6 +119,7 @@ run_eval <- function( mod_obj = mod_obj, censor_covariates = censor_covariates, weight_prior = weight_prior, + incremental = incremental, progress_function = p, .threads = threads, .skip = .vpc_options$vpc_only diff --git a/R/run_eval_core.R b/R/run_eval_core.R index b1ef6ac..08bbdf9 100644 --- a/R/run_eval_core.R +++ b/R/run_eval_core.R @@ -26,10 +26,15 @@ run_eval_core <- function( for(i in seq_along(iterations)) { - ## TODO: this will change once we support sample-weighting strategies - ## For now this is just simple full-weighting for all previous points - weights <- rep(0, nrow(obs_data)) - weights[obs_data[["_grouper"]] %in% iterations[1:i]] <- 1 + ## Select which samples should be used in fit, for regular iterative + ## forecasting and incremental. + ## TODO: handle weighting down of earlier samples + weights <- handle_sample_weighting( + obs_data, + iterations, + incremental, + i + ) ## Should covariate data be leaked? PsN::proseval does this, ## but for use in MIPD they should be censored. @@ -42,10 +47,15 @@ run_eval_core <- function( ## Do a fit with PKPDmap, using only the points ## with weight=1. The rest of the points will get a predicted value ## in the output, but they were not weighted in the fit. + mod_upd <- mod_obj + if(incremental & i > 1) { + mod_upd$parameters <- fit$parameters # take params from previous fit + mod_upd$omega <- fit$vcov + } fit <- PKPDmap::get_map_estimates( model = mod_obj$model, - parameters = mod_obj$parameters, - omega = mod_obj$omega, + parameters = mod_upd$parameters, + omega = mod_upd$omega, error = mod_obj$ruv, fixed = mod_obj$fixed, as_eta = mod_obj$kappa, @@ -65,6 +75,8 @@ run_eval_core <- function( dv = fit$dv, ipred = fit$ipred, pred = fit$pred, + ofv = fit$fit$value, + ss_w = ss(fit$dv, fit$ipred, weights), `_iteration` = iterations[i], `_grouper` = obs_data$`_grouper` ) @@ -76,6 +88,41 @@ run_eval_core <- function( ) } + ## Get data for MAP fit + if(incremental) { + ## need to do an extra MAP Bayesian fit, because we can't use the + ## incrementally updated parameters + omega + fit_map <- PKPDmap::get_map_estimates( + model = mod_obj$model, + parameters = mod_obj$parameters, # use original model params! + omega = mod_obj$omega, # use original model params! + error = mod_obj$ruv, + fixed = mod_obj$fixed, + as_eta = mod_obj$kappa, + data = data$observations, + covariates = cov_data, + regimen = data$regimen, + weight_prior = weight_prior, + weights = NULL, # no sample weighting, full MAP Bayesian on all samples + iov_bins = mod_obj$bins, + verbose = FALSE + ) + map_pred_data <- tibble::tibble( + id = obs_data$id, + t = obs_data$t, + dv = fit_map$dv, + ipred = fit_map$ipred, + pred = fit_map$pred, + ofv = fit_map$fit$value, + ss_w = ss(fit_map$dv, fit_map$ipred, w = NULL), + `_iteration` = iterations[i], + `_grouper` = obs_data$`_grouper` + ) + } else { # just take last fit object and pred_data + fit_map <- fit + map_pred_data <- pred_data + } + ## pre-pend population predictions for the first observation # TODO: Refactor this logic into a function or functions, e.g., the first # argument to bind_rows() could be refactored into `get_apriori_data()`. @@ -84,7 +131,9 @@ run_eval_core <- function( dplyr::filter(.data$`_iteration` == 1) |> dplyr::mutate( `_iteration` = 0, - ipred = .data$pred + ipred = .data$pred, + ofv = NA, + ss_w = NA ) |> # set to population parameters, not individual estimates dplyr::select(-!!names(mod_obj$parameters)) |> dplyr::left_join( @@ -96,17 +145,17 @@ run_eval_core <- function( dplyr::filter(.data$`_iteration` == (.data$`_grouper` - 1)) |> dplyr::mutate( iter_ipred = .data$ipred, - map_ipred = pred_data$ipred, # ipred from last fit (full MAP) + map_ipred = map_pred_data$ipred, # ipred from full retrospective MAP apriori = (.data$`_iteration` == 0) ) |> dplyr::select( "id", "_iteration", "_grouper", "t", "dv", "pred", "map_ipred", + "ofv", "ss_w", "iter_ipred", "apriori", !!names(mod_obj$parameters) ) out - } #' Handle covariate censoring @@ -136,3 +185,31 @@ handle_covariate_censoring <- function( } cov_data } + +#' Handle weighting of samples +#' +#' This function is used to select the samples used in the fit (1 or 0), +#' but also to select their weight, if a sample weighting strategy is +#' selected. +#' +#' @inheritParams run_eval_core +#' @param obs_data tibble or data.frame with observed data for individual +#' @param iterations numeric vector of groups +#' @param i index +#' +#' @returns vector of weights (numeric) +#' +handle_sample_weighting <- function( + obs_data, + iterations, + incremental, + i +) { + weights <- rep(0, nrow(obs_data)) + if(incremental) { # just fit current sample or group + weights[obs_data[["_grouper"]] %in% iterations[i]] <- 1 + } else { # fit all samples up until current sample + weights[obs_data[["_grouper"]] %in% iterations[1:i]] <- 1 + } + weights +} diff --git a/man/handle_sample_weighting.Rd b/man/handle_sample_weighting.Rd new file mode 100644 index 0000000..40e009d --- /dev/null +++ b/man/handle_sample_weighting.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run_eval_core.R +\name{handle_sample_weighting} +\alias{handle_sample_weighting} +\title{Handle weighting of samples} +\usage{ +handle_sample_weighting(obs_data, iterations, incremental, i) +} +\arguments{ +\item{obs_data}{tibble or data.frame with observed data for individual} + +\item{iterations}{numeric vector of groups} + +\item{incremental}{should MAP Bayesian do incremental fits in the iterative +loop? I.e. in this case it would use the first iterations MAP Bayesian +estimates as input for the second iteration, and so forth. The uncertainty +around the MAP estimates would be used as the new \code{omega} matrix. This +approach has been called "model predictive control (MPC)" +(www.page-meeting.org/?abstract=9076) and may be more predictive than +"regular" MAP in some scenarios. Default is \code{FALSE}.} + +\item{i}{index} +} +\value{ +vector of weights (numeric) +} +\description{ +This function is used to select the samples used in the fit (1 or 0), +but also to select their weight, if a sample weighting strategy is +selected. +} diff --git a/man/run_eval.Rd b/man/run_eval.Rd index 7a318a1..49e40fa 100644 --- a/man/run_eval.Rd +++ b/man/run_eval.Rd @@ -74,7 +74,7 @@ by default (so no data leakage), but it can be switched off if we want to match the behavior of \code{PsN::proseval} exactly.} \item{incremental}{should MAP Bayesian do incremental fits in the iterative -loop. I.e. in this case it would use the first iterations MAP Bayesian +loop? I.e. in this case it would use the first iterations MAP Bayesian estimates as input for the second iteration, and so forth. The uncertainty around the MAP estimates would be used as the new \code{omega} matrix. This approach has been called "model predictive control (MPC)" diff --git a/man/run_eval_core.Rd b/man/run_eval_core.Rd index cc0195a..23de731 100644 --- a/man/run_eval_core.Rd +++ b/man/run_eval_core.Rd @@ -38,7 +38,7 @@ by default (so no data leakage), but it can be switched off if we want to match the behavior of \code{PsN::proseval} exactly.} \item{incremental}{should MAP Bayesian do incremental fits in the iterative -loop. I.e. in this case it would use the first iterations MAP Bayesian +loop? I.e. in this case it would use the first iterations MAP Bayesian estimates as input for the second iteration, and so forth. The uncertainty around the MAP estimates would be used as the new \code{omega} matrix. This approach has been called "model predictive control (MPC)" diff --git a/man/ss.Rd b/man/ss.Rd new file mode 100644 index 0000000..aef365c --- /dev/null +++ b/man/ss.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\name{ss} +\alias{ss} +\title{Weighted sum-of-squares of residuals} +\usage{ +ss(obs, pred, w = NULL) +} +\arguments{ +\item{obs}{observations vector} + +\item{pred}{predictions vector} + +\item{w}{weights} +} +\description{ +Weighted sum-of-squares of residuals +} diff --git a/tests/testthat/test-handle_sample_weighting.R b/tests/testthat/test-handle_sample_weighting.R new file mode 100644 index 0000000..27921f5 --- /dev/null +++ b/tests/testthat/test-handle_sample_weighting.R @@ -0,0 +1,111 @@ +test_that("handle_sample_weighting returns correct structure", { + + # Create test data + obs_data <- data.frame( + id = c(1, 1, 1, 1), + t = c(0, 12, 24, 48), + dv = c(10, 8, 6, 4), + `_grouper` = c(1, 2, 3, 4), + check.names = FALSE + ) + + iterations <- c(1, 2, 3, 4) + + result <- handle_sample_weighting(obs_data, iterations, incremental = FALSE, i = 2) + + expect_type(result, "double") + expect_length(result, nrow(obs_data)) + expect_true(all(result %in% c(0, 1))) # Weights should be 0 or 1 +}) + +test_that("handle_sample_weighting works correctly for non-incremental mode", { + obs_data <- data.frame( + `_grouper` = c(1, 2, 3, 4, 5), + check.names = FALSE + ) + + iterations <- c(1, 2, 3, 4, 5) + + # Test i = 1 (first iteration) + weights_i1 <- handle_sample_weighting(obs_data, iterations, incremental = FALSE, i = 1) + expect_equal(weights_i1, c(1, 0, 0, 0, 0)) + + # Test i = 3 (third iteration) + weights_i3 <- handle_sample_weighting(obs_data, iterations, incremental = FALSE, i = 3) + expect_equal(weights_i3, c(1, 1, 1, 0, 0)) + + # Test i = 5 (last iteration) + weights_i5 <- handle_sample_weighting(obs_data, iterations, incremental = FALSE, i = 5) + expect_equal(weights_i5, c(1, 1, 1, 1, 1)) +}) + +test_that("handle_sample_weighting works correctly for incremental mode", { + obs_data <- data.frame( + `_grouper` = c(1, 2, 3, 4, 5), + check.names = FALSE + ) + + iterations <- c(1, 2, 3, 4, 5) + + # Test i = 1 (first iteration) - should only use first group + weights_i1 <- handle_sample_weighting(obs_data, iterations, incremental = TRUE, i = 1) + expect_equal(weights_i1, c(1, 0, 0, 0, 0)) + + # Test i = 3 (third iteration) - should only use third group + weights_i3 <- handle_sample_weighting(obs_data, iterations, incremental = TRUE, i = 3) + expect_equal(weights_i3, c(0, 0, 1, 0, 0)) + + # Test i = 5 (last iteration) - should only use fifth group + weights_i5 <- handle_sample_weighting(obs_data, iterations, incremental = TRUE, i = 5) + expect_equal(weights_i5, c(0, 0, 0, 0, 1)) +}) + +test_that("handle_sample_weighting handles multiple observations per group", { + # Multiple observations can have the same _grouper value + obs_data <- data.frame( + `_grouper` = c(1, 1, 2, 2, 3, 3), + check.names = FALSE + ) + + iterations <- c(1, 2, 3) + + # Non-incremental: i = 2 should include groups 1 and 2 + weights_non_inc <- handle_sample_weighting(obs_data, iterations, incremental = FALSE, i = 2) + expect_equal(weights_non_inc, c(1, 1, 1, 1, 0, 0)) + + # Incremental: i = 2 should only include group 2 + weights_inc <- handle_sample_weighting(obs_data, iterations, incremental = TRUE, i = 2) + expect_equal(weights_inc, c(0, 0, 1, 1, 0, 0)) +}) + + +test_that("handle_sample_weighting handles edge case with single observation", { + obs_data <- data.frame( + `_grouper` = c(1), + check.names = FALSE + ) + + iterations <- c(1) + + # Both modes should give same result for single observation + weights_non_inc <- handle_sample_weighting(obs_data, iterations, incremental = FALSE, i = 1) + weights_inc <- handle_sample_weighting(obs_data, iterations, incremental = TRUE, i = 1) + + expect_equal(weights_non_inc, c(1)) + expect_equal(weights_inc, c(1)) +}) + +test_that("handle_sample_weighting maintains correct vector length", { + # Test with various data sizes + for(n_obs in c(1, 5, 10, 100)) { + obs_data <- data.frame( + `_grouper` = rep(1:min(n_obs, 10), length.out = n_obs), + check.names = FALSE + ) + + iterations <- unique(obs_data[["_grouper"]]) + + weights <- handle_sample_weighting(obs_data, iterations, incremental = FALSE, i = 1) + expect_length(weights, n_obs) + } +}) diff --git a/tests/testthat/test-run_eval.R b/tests/testthat/test-run_eval.R index 3d9cacc..4f50953 100644 --- a/tests/testthat/test-run_eval.R +++ b/tests/testthat/test-run_eval.R @@ -19,6 +19,24 @@ test_that("Basic run with vanco data + model works", { expect_equal(names(res), c("results", "mod_obj", "data", "sim", "stats_summ", "shrinkage", "bayesian_impact")) expect_s3_class(res$results, c("tbl_df", "tbl", "data.frame")) expect_s3_class(res$stats_summ, c("tbl_df", "tbl", "data.frame")) + expect_equal( + names(res$results), + c("id", "_iteration", "_grouper", "t", "dv", "pred", "map_ipred", + "ofv", "ss_w", "iter_ipred", "apriori", "CL", "V", "TH_CRCL", + "Q", "V2") + ) + expect_equal( + round(res$results$CL[1:5], 3), + c(2.99, 2.685, 2.462, 2.430, 2.439) + ) + expect_equal( + round(res$results$ofv[1:5], 3), + c(NA, 6.196, 11.846, 17.046, 21.563) + ) + expect_equal( + round(res$results$ss_w[1:5], 3), + c(NA, 16.839, 11.241, 27.358, 30.944) + ) # Using PKPDsim model library: res_2 <- run_eval( @@ -113,3 +131,35 @@ test_that("Run also works when `dictionary` is used", { expect_equal(names(res), c("results", "mod_obj", "data", "sim", "stats_summ", "shrinkage", "bayesian_impact")) }) +test_that("Incremental Bayes method works", { + local_mipdeval_options() + res <- run_eval( + model = "pkvancothomson", + data = nm_vanco, + incremental = TRUE, + progress = F, + ids = c(1:3) + ) + expect_equal(names(res), c("results", "mod_obj", "data", "sim", "stats_summ", "shrinkage", "bayesian_impact")) + expect_equal( + res$stats_summ, + structure(list( + type = c("iter_ipred", "iter_ipred", "map_ipred", +"map_ipred", "pred", "pred"), + apriori = c(0, 1, 0, 1, 0, 1), + rmse = c(4.051, 8.299, 1.96, 4.319, 4.789, 8.299), + nrmse = c(0.411, + 0.232, 0.199, 0.121, 0.486, 0.232), + mpe = c(-0.339, -0.004, + -0.045, -0.002, -0.415, -0.004), + mape = c(0.445, 0.246, 0.166, + 0.118, 0.558, 0.246) + ), + class = c("mipdeval_results_stats_summ","tbl_df", "tbl", "data.frame"), + row.names = c(NA, -6L)) + ) + expect_equal( + round(res$results$CL[1:5], 3), + c(2.99, 2.685, 2.529, 2.668, 2.666) # should be different CL progression (expect first 2 values) than from test above (non-incremental) + ) +})