From bb62cc55f04725a42d2cc4f177cba7f096a2236e Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Fri, 27 Mar 2026 10:55:50 +0100 Subject: [PATCH 01/16] Remove testr dependency for CRAN submission MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace testr with withr/testthat equivalents to enable CRAN submission. CRAN prohibits packages with Remotes fields. - Replace testr::create_local_project() → withr temp dir pattern - Replace testr::skip_on_devops() → testthat::skip_on_ci() - Remove testr from Suggests and delete Remotes field - Delete helper-skip-on-devops.R --- DESCRIPTION | 5 +-- tests/testthat/helper-skip-on-devops.R | 6 --- tests/testthat/test-add_event_index.R | 4 +- tests/testthat/test-apply_stats.R | 14 +++--- .../testthat/test-check_duplicate_functions.R | 27 +++++++---- tests/testthat/test-pipeline_manual.R | 2 +- tests/testthat/test-targets.R | 15 ++++--- tests/testthat/test-try_and_validate.R | 10 +++-- tests/testthat/test-use_chef.R | 45 +++++++++++-------- 9 files changed, 72 insertions(+), 56 deletions(-) delete mode 100644 tests/testthat/helper-skip-on-devops.R diff --git a/DESCRIPTION b/DESCRIPTION index 4fd9775..88462c5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,15 +40,12 @@ Suggests: glue, pryr, rmarkdown, - testr, testthat (>= 3.0.0), tidyr, withr, whisker -VignetteBuilder: +VignetteBuilder: knitr -Remotes: - matthew-phelps/testr Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) diff --git a/tests/testthat/helper-skip-on-devops.R b/tests/testthat/helper-skip-on-devops.R deleted file mode 100644 index 6cdce80..0000000 --- a/tests/testthat/helper-skip-on-devops.R +++ /dev/null @@ -1,6 +0,0 @@ -skip_on_devops <- function() { - if (!identical(Sys.getenv("ON_DEVOPS"), "TRUE")) { - return(invisible(TRUE)) - } - testthat::skip("On DevOps") -} diff --git a/tests/testthat/test-add_event_index.R b/tests/testthat/test-add_event_index.R index 9dd3461..9d23767 100644 --- a/tests/testthat/test-add_event_index.R +++ b/tests/testthat/test-add_event_index.R @@ -96,7 +96,7 @@ test_that("add_event_index works over multiple rows in ep with custom filter", { test_that("add_event_index works over expanded endpoints", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() + testthat::skip_on_ci() ep <- mk_ep_0001_base( stratify_by = list(c("SEX")), data_prepare = mk_adcm, @@ -136,7 +136,7 @@ test_that("add_event_index works over expanded endpoints", { test_that("add_event_index works over expanded endpoints with endpoint filter", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() + testthat::skip_on_ci() ep <- mk_ep_0001_base( stratify_by = list(c("SEX")), data_prepare = mk_adcm, diff --git a/tests/testthat/test-apply_stats.R b/tests/testthat/test-apply_stats.R index 571cbab..63c681e 100644 --- a/tests/testthat/test-apply_stats.R +++ b/tests/testthat/test-apply_stats.R @@ -1,7 +1,7 @@ test_that("base: stat_by_strata_by_trt", { # SETUP ------------------------------------------------------------------- - skip_on_devops() + testthat::skip_on_ci() ep <- mk_ep_0001_base( stratify_by = list(c("SEX")), data_prepare = mk_adcm, @@ -76,7 +76,7 @@ test_that("base: stat_by_strata_by_trt", { test_that("validate: by_strata_by_trt returns same value as manual calculation with period flag", { # SETUP ------------------------------------------------------------------- - skip_on_devops() + testthat::skip_on_ci() ep <- mk_ep_0001_base( stratify_by = list(c("SEX")), data_prepare = mk_adcm, @@ -157,7 +157,7 @@ test_that("validate: by_strata_by_trt returns same value as manual calculation w test_that("by_strata_by_trt returns same value as manual calculation without period flag", { # SETUP ------------------------------------------------------------------- - skip_on_devops() + testthat::skip_on_ci() ep <- mk_ep_0001_base( stratify_by = list(c("SEX")), data_prepare = mk_adcm, @@ -239,7 +239,7 @@ test_that("by_strata_by_trt returns same value as manual calculation without per test_that("validate: n_sub return correct value", { # SETUP ------------------------------------------------------------------- - skip_on_devops() + testthat::skip_on_ci() ep <- mk_ep_0001_base( stratify_by = list(c("SEX")), data_prepare = mk_adcm, @@ -317,7 +317,7 @@ test_that("validate: n_sub return correct value", { test_that("apply_stats stat_by_strata_across_trt", { # SETUP ------------------------------------------------------------------- - skip_on_devops() + testthat::skip_on_ci() ep <- mk_ep_0001_base( custom_pop_filter = "TRT01A %in% c('Placebo', 'Xanomeline High Dose')", @@ -396,7 +396,7 @@ test_that("apply_stats stat_by_strata_across_trt", { test_that("apply_stats stat_across_strata_across_trt when no across_strata_across_trt fn supplied", { # SETUP ------------------------------------------------------------------- - skip_on_devops() + testthat::skip_on_ci() ep <- mk_endpoint_str( study_metadata = list(), pop_var = "SAFFL", @@ -472,7 +472,7 @@ test_that("apply_stats stat_across_strata_across_trt when no across_strata_acros test_that("apply_stats: with all FALSE for criteria", { # SETUP ------------------------------------------------------------------- - skip_on_devops() + testthat::skip_on_ci() crit_false <- function(...) FALSE diff --git a/tests/testthat/test-check_duplicate_functions.R b/tests/testthat/test-check_duplicate_functions.R index af52495..11caad6 100644 --- a/tests/testthat/test-check_duplicate_functions.R +++ b/tests/testthat/test-check_duplicate_functions.R @@ -1,12 +1,14 @@ test_that("check_duplicate_functions handles empty directory correctly", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) expect_null(check_duplicate_functions("R/")) }) test_that( "check_duplicate_functions handles directory with no duplicate function names correctly", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) write("f1 <- function(){}", "R/tmp.R") write("f2 <- function(){}", "R/tmp.R", append = TRUE) expect_null(check_duplicate_functions(dir = "R/")) @@ -14,7 +16,8 @@ test_that( ) test_that("check_duplicate_functions correctly identifies duplicate function names", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) write("f1 <- function(){}", "R/tmp.R") write("f1 <- function(){}", "R/tmp.R", append = TRUE) expect_error( @@ -23,7 +26,8 @@ test_that("check_duplicate_functions correctly identifies duplicate function nam }) test_that("check_duplicate_functions handles non-existent directory correctly", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) expect_error( check_duplicate_functions("R_fun"), "Directory R_fun does not exist" @@ -31,21 +35,24 @@ test_that("check_duplicate_functions handles non-existent directory correctly", }) test_that("check_duplicate_functions handles directory with non-R files correctly", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) write("f1 <- function(){}", "R/tmp.R") write("f1 <- function(){}", "R/tmp.txt") expect_null(check_duplicate_functions("R")) }) test_that("check_duplicate_functions handles directory with R files but no function definitions correctly", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) write("f1 <- function(){}", "R/tmp.R") write("f1 <- 5", "R/tmp.R", append = TRUE) expect_null(check_duplicate_functions("R")) }) test_that("check_duplicate_functions correctly identifies all duplicate function names", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) write("f1 <- function(){}", "R/tmp.R") write("f1 <- function(){}", "R/tmp.R", append = TRUE) write("f2 <- function(){}", "R/tmp.R", append = TRUE) @@ -58,7 +65,8 @@ test_that("check_duplicate_functions correctly identifies all duplicate function }) test_that("check_duplicate_functions handles function definitions with different parameters but same name correctly", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) write("f1 <- function(x){x}", "R/tmp.R") write("f1 <- function(y){y}", "R/tmp.R", append = TRUE) expect_error( @@ -69,7 +77,8 @@ test_that("check_duplicate_functions handles function definitions with different test_that("check_duplicate_functions treats functions with the same name but different case as distinct", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) write("f1 <- function(x){x}", "R/tmp.R") write("F1 <- function(x){x}", "R/tmp.R", append = TRUE) expect_null(check_duplicate_functions("R")) diff --git a/tests/testthat/test-pipeline_manual.R b/tests/testthat/test-pipeline_manual.R index 66c9d3a..f9f8e1c 100644 --- a/tests/testthat/test-pipeline_manual.R +++ b/tests/testthat/test-pipeline_manual.R @@ -1,5 +1,5 @@ test_that("Manual pipeline works", { - testr::skip_on_devops() + testthat::skip_on_ci() crit_endpoint <- function(dat, event_index, diff --git a/tests/testthat/test-targets.R b/tests/testthat/test-targets.R index d7627f5..b8d5c4b 100644 --- a/tests/testthat/test-targets.R +++ b/tests/testthat/test-targets.R @@ -1,6 +1,7 @@ test_that("Base case: targets pipeline works", { # SETUP ------------------------------------------------------------------- - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) crit_endpoint <- function(...) { return(T) } @@ -71,7 +72,8 @@ test_that("Base case: targets pipeline works", { test_that("targets pipeline works no criteria fn and missing by_* functions", { # SETUP ------------------------------------------------------------------- - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) mk_ep_def <- function() { ep <- mk_endpoint_str( @@ -125,7 +127,8 @@ test_that("targets pipeline works no criteria fn and missing by_* functions", test_that("branching after prepare for stats step works", { # SETUP ------------------------------------------------------------------- - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) mk_ep_def <- function() { ep <- mk_endpoint_str( @@ -175,7 +178,8 @@ test_that("branching after prepare for stats step works", { test_that("ep_fn_map is always outdated", { # SETUP ------------------------------------------------------------------- - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) mk_ep_def <- function() { ep <- mk_endpoint_str( @@ -219,7 +223,8 @@ test_that("ep_fn_map is always outdated", { test_that("study_data responds to changes in source data", { # SETUP ------------------------------------------------------------------- - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) saveRDS(data.table(runif(10)), file = "tmp_data_obj.rds") mk_test_fn <- function(study_metadata) { readRDS("tmp_data_obj.rds") diff --git a/tests/testthat/test-try_and_validate.R b/tests/testthat/test-try_and_validate.R index 2ae5a11..e7640dc 100644 --- a/tests/testthat/test-try_and_validate.R +++ b/tests/testthat/test-try_and_validate.R @@ -151,10 +151,11 @@ test_that( ) test_that("test in a targets setting.", { - testr::skip_on_devops() + testthat::skip_on_ci() # SETUP ------------------------------------------------------------------- - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) crit_endpoint <- function(...) { return(T) } @@ -264,10 +265,11 @@ test_that("loaded packages are included.", { test_that("loaded packages are included - In targets setting", { - testr::skip_on_devops() + testthat::skip_on_ci() # SETUP ------------------------------------------------------------------- - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) crit_endpoint <- function(...) { return(T) } diff --git a/tests/testthat/test-use_chef.R b/tests/testthat/test-use_chef.R index 9049c66..abb98d7 100644 --- a/tests/testthat/test-use_chef.R +++ b/tests/testthat/test-use_chef.R @@ -1,7 +1,8 @@ test_that("use_chef makes top-level dirs and files", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) # ACT --------------------------------------------------------------------- use_chef(pipeline_id = "001") @@ -14,8 +15,9 @@ test_that("use_chef makes top-level dirs and files", { test_that("use_chef makes top-level dirs and fils when in Rproj", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project(rstudio = TRUE) + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) # ACT --------------------------------------------------------------------- use_chef(pipeline_id = "001") @@ -30,8 +32,9 @@ test_that("use_chef makes top-level dirs and fils when in Rproj", { test_that("use_chef writes default R files", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) # ACT --------------------------------------------------------------------- use_chef(pipeline_id = "001") @@ -49,8 +52,9 @@ test_that("use_chef writes default R files", { test_that("use_chef writes ammnog crit functions", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) crit_endpoint <- function() { "check" } @@ -74,8 +78,9 @@ test_that("use_chef writes ammnog crit functions", { test_that("use_chef writes custom mk_endpoint_def fn, and uses standard name", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) mk_endpoint_custom <- function() { "check" } @@ -96,8 +101,9 @@ test_that("use_chef writes custom mk_endpoint_def fn, and uses standard name", { test_that("use_chef writes custom mk_adam fn", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) mk_adam_custom <- function() { "check" } @@ -120,8 +126,9 @@ test_that("use_chef writes custom mk_adam fn", { test_that("use_chef writes multiple mk_adam fn's", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) mk_adam_custom <- function() { "check" } @@ -155,8 +162,9 @@ test_that("use_chef writes multiple mk_adam fn's", { test_that("use_chef set-up in README works", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) mk_endpoint_definition <- function() { mk_endpoint_str( study_metadata = list(), @@ -192,8 +200,9 @@ test_that("use_chef set-up in README works", { }) test_that("use_chef with custom pipeline_dir names works", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) mk_endpoint_definition <- function() { mk_endpoint_str( study_metadata = list(), From bcdd8e59859c801fe3fbfbd0a08b3d034705b7de Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Thu, 9 Apr 2026 16:16:21 +0200 Subject: [PATCH 02/16] Add examples to built-in functions --- DESCRIPTION | 2 +- R/add_id.R | 26 +++++++++++++++++- R/check_duplicate_functions.R | 44 ++++++++++++++++++++++++++++-- R/try_and_validate.R | 46 ++++++++++++++++++++++++++++++-- R/utils.R | 28 +++++++++++++++++++ man/add_id.Rd | 25 +++++++++++++++-- man/check_duplicate_functions.Rd | 44 +++++++++++++++++++++++++++--- man/llist.Rd | 29 +++++++++++++++++++- man/validate_stat_output.Rd | 45 +++++++++++++++++++++++++++++-- 9 files changed, 275 insertions(+), 14 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 88462c5..f2ed282 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -49,7 +49,7 @@ VignetteBuilder: Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.3 URL: https://hta-pharma.github.io/chef/, https://github.com/hta-pharma/chef diff --git a/R/add_id.R b/R/add_id.R index 6dd1c8f..f014917 100644 --- a/R/add_id.R +++ b/R/add_id.R @@ -1,9 +1,33 @@ #' Add ID to user-defined endpoint groups #' +#' Adds a sequential identifier (`endpoint_spec_id`) to each row of an endpoint +#' definition table. These IDs track endpoint specifications through the analysis +#' pipeline and enable mapping of results back to original endpoint definitions. +#' #' @param ep A `data.table` containing endpoint definitions. #' -#' @return data.table +#' @return data.table with an additional `endpoint_spec_id` column containing +#' sequential integers starting from 1. +#' #' @export +#' +#' @examples +#' library(data.table) +#' +#' # Typical endpoint definition table from chef workflows +#' endpoints <- data.table( +#' endpoint_label = c("Primary Efficacy", "Safety", "Tolerability"), +#' analysis_type = c("efficacy", "adverse_event", "adverse_event"), +#' custom_pop_filter = c("AGE >= 18", "SAFFL=='Y'", "SAFFL=='Y'") +#' ) +#' +#' # Add sequential IDs for tracking through analysis pipeline +#' endpoints_with_ids <- add_id(endpoints) +#' +#' endpoints_with_ids +#' +#' # The endpoint_spec_id column now uniquely identifies each endpoint +#' # and can be used to map statistical results back to endpoint definitions add_id <- function(ep){ endpoint_spec_id <- NULL x <- copy(ep) diff --git a/R/check_duplicate_functions.R b/R/check_duplicate_functions.R index b2e1ee9..7bea580 100644 --- a/R/check_duplicate_functions.R +++ b/R/check_duplicate_functions.R @@ -1,11 +1,51 @@ #' Check for duplicate function definitions #' +#' Scans a directory for R files and identifies any function definitions that +#' appear multiple times (by name). This is useful in endpoint analysis pipelines +#' to prevent accidental function redefinitions that could cause unexpected +#' behavior. The check is case-sensitive (i.e., `func_name` and `Func_name` are +#' considered distinct). +#' #' @param dir The directory where the custom functions are defined #' -#' @return run for side-effects, if multiple functions are encountered it throws -#' an error +#' @return Run for side-effects. Returns `NULL` invisibly if no duplicates are +#' found. If duplicate functions are detected, an error is thrown with details +#' about which functions are duplicated. +#' #' @export #' +#' @examples +#' \dontrun{ +#' # Create a temporary directory for demonstration +#' tmp_dir <- tempdir() +#' +#' # Valid case: no duplicate function names +#' writeLines(" +#' compute_n_subjects <- function(dat, ...) { +#' nrow(dat) +#' } +#' ", file.path(tmp_dir, "stat_functions_1.R")) +#' +#' writeLines(" +#' compute_event_rate <- function(dat, ...) { +#' sum(dat$is_event) / nrow(dat) +#' } +#' ", file.path(tmp_dir, "stat_functions_2.R")) +#' +#' # Check passes - no duplicates found +#' check_duplicate_functions(tmp_dir) +#' +#' # Invalid case: duplicate function names cause error +#' writeLines(" +#' compute_n_subjects <- function(dat, ...) { +#' sum(!is.na(dat$id)) # Different implementation +#' } +#' ", file.path(tmp_dir, "stat_functions_2.R")) +#' +#' # This will error with message about "compute_n_subjects" being duplicated +#' check_duplicate_functions(tmp_dir) +#' } +#' check_duplicate_functions <- function(dir) { if (!dir.exists(dir)) { stop(paste0("Directory ", dir, " does not exist")) diff --git a/R/try_and_validate.R b/R/try_and_validate.R index 3eaea3c..1e27f5b 100644 --- a/R/try_and_validate.R +++ b/R/try_and_validate.R @@ -233,12 +233,54 @@ validate_crit_output <- function(output) { #' #' @description Validates the output of statistical functions to ensure it #' conforms to expected structure. The expected structure includes specific -#' column names and non-empty results. +#' column names (`label`, `description`, `qualifiers`, `value`) and at least one row. +#' This validator is typically used with [try_and_validate()] to ensure +#' statistical functions return correctly formatted results. #' #' @param output The output from a statistical function. #' -#' @return An error message if validation fails, otherwise NA. +#' @return An error message (character) if validation fails, otherwise `NA_character_`. +#' Use with [try_and_validate()] to automatically handle validation failures. +#' #' @export +#' +#' @examples +#' library(data.table) +#' +#' # Valid statistical output - correctly formatted +#' valid_stats <- data.table( +#' label = c("N", "n_events", "Rate"), +#' description = c( +#' "Number of subjects", +#' "Number with adverse events", +#' "Percentage with events" +#' ), +#' qualifiers = NA_character_, +#' value = c(250L, 45L, 18.0) +#' ) +#' +#' # Validation passes +#' validate_stat_output(valid_stats) # Returns NA_character_ +#' +#' # Invalid: wrong class (must be data.table) +#' invalid_vector <- c(N = 250, events = 45) +#' validate_stat_output(invalid_vector) # Returns error message +#' +#' # Invalid: missing required columns +#' incomplete_stats <- data.table( +#' label = c("N", "Rate"), +#' value = c(250L, 18.0) +#' ) +#' validate_stat_output(incomplete_stats) # Shows missing columns +#' +#' # Invalid: empty result (0 rows) +#' empty_stats <- data.table( +#' label = character(), +#' description = character(), +#' qualifiers = character(), +#' value = numeric() +#' ) +#' validate_stat_output(empty_stats) # Error: 0 rows returned validate_stat_output <- function(output) { # if not a DT return early if (!data.table::is.data.table(output)) { diff --git a/R/utils.R b/R/utils.R index f373026..ed83e06 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,11 +1,39 @@ #' Make list of lists #' +#' Creates a nested list structure where the input elements are wrapped in +#' an outer list. This is useful for organizing function specifications in +#' analysis pipelines, particularly when using the targets framework. +#' #' @param ... Elements to be included in the nested list #' @return A list object containing a list where each element is defined by #' `...` #' @importFrom magrittr %>% #' @import targets #' @export +#' @examples +#' # Create a nested list structure for endpoint statistics configurations +#' # Common use case: grouping multiple statistical functions by analysis level +#' stats_config <- llist( +#' n_subjects = function(dat, ...) { +#' data.table::data.table( +#' label = "N", +#' description = "Number of subjects", +#' qualifiers = NA_character_, +#' value = nrow(dat) +#' ) +#' }, +#' mean_value = function(dat, var, ...) { +#' data.table::data.table( +#' label = "Mean", +#' description = paste("Mean of", var), +#' qualifiers = NA_character_, +#' value = mean(dat[[var]], na.rm = TRUE) +#' ) +#' } +#' ) +#' +#' # The structure enables targets to handle multiple analysis functions +#' str(stats_config) llist <- function(...) { list(list(...)) } diff --git a/man/add_id.Rd b/man/add_id.Rd index 9740c1c..6ca9f24 100644 --- a/man/add_id.Rd +++ b/man/add_id.Rd @@ -10,8 +10,29 @@ add_id(ep) \item{ep}{A \code{data.table} containing endpoint definitions.} } \value{ -data.table +data.table with an additional \code{endpoint_spec_id} column containing +sequential integers starting from 1. } \description{ -Add ID to user-defined endpoint groups +Adds a sequential identifier (\code{endpoint_spec_id}) to each row of an endpoint +definition table. These IDs track endpoint specifications through the analysis +pipeline and enable mapping of results back to original endpoint definitions. +} +\examples{ +library(data.table) + +# Typical endpoint definition table from chef workflows +endpoints <- data.table( + endpoint_label = c("Primary Efficacy", "Safety", "Tolerability"), + analysis_type = c("efficacy", "adverse_event", "adverse_event"), + custom_pop_filter = c("AGE >= 18", "SAFFL=='Y'", "SAFFL=='Y'") +) + +# Add sequential IDs for tracking through analysis pipeline +endpoints_with_ids <- add_id(endpoints) + +endpoints_with_ids + +# The endpoint_spec_id column now uniquely identifies each endpoint +# and can be used to map statistical results back to endpoint definitions } diff --git a/man/check_duplicate_functions.Rd b/man/check_duplicate_functions.Rd index 8bd80d4..fcb3cd7 100644 --- a/man/check_duplicate_functions.Rd +++ b/man/check_duplicate_functions.Rd @@ -10,9 +10,47 @@ check_duplicate_functions(dir) \item{dir}{The directory where the custom functions are defined} } \value{ -run for side-effects, if multiple functions are encountered it throws -an error +Run for side-effects. Returns \code{NULL} invisibly if no duplicates are +found. If duplicate functions are detected, an error is thrown with details +about which functions are duplicated. } \description{ -Check for duplicate function definitions +Scans a directory for R files and identifies any function definitions that +appear multiple times (by name). This is useful in endpoint analysis pipelines +to prevent accidental function redefinitions that could cause unexpected +behavior. The check is case-sensitive (i.e., \code{func_name} and \code{Func_name} are +considered distinct). +} +\examples{ +\dontrun{ + # Create a temporary directory for demonstration + tmp_dir <- tempdir() + + # Valid case: no duplicate function names + writeLines(" + compute_n_subjects <- function(dat, ...) { + nrow(dat) + } + ", file.path(tmp_dir, "stat_functions_1.R")) + + writeLines(" + compute_event_rate <- function(dat, ...) { + sum(dat$is_event) / nrow(dat) + } + ", file.path(tmp_dir, "stat_functions_2.R")) + + # Check passes - no duplicates found + check_duplicate_functions(tmp_dir) + + # Invalid case: duplicate function names cause error + writeLines(" + compute_n_subjects <- function(dat, ...) { + sum(!is.na(dat$id)) # Different implementation + } + ", file.path(tmp_dir, "stat_functions_2.R")) + + # This will error with message about "compute_n_subjects" being duplicated + check_duplicate_functions(tmp_dir) +} + } diff --git a/man/llist.Rd b/man/llist.Rd index 6c0031a..7b487a5 100644 --- a/man/llist.Rd +++ b/man/llist.Rd @@ -14,5 +14,32 @@ A list object containing a list where each element is defined by \code{...} } \description{ -Make list of lists +Creates a nested list structure where the input elements are wrapped in +an outer list. This is useful for organizing function specifications in +analysis pipelines, particularly when using the targets framework. +} +\examples{ +# Create a nested list structure for endpoint statistics configurations +# Common use case: grouping multiple statistical functions by analysis level +stats_config <- llist( + n_subjects = function(dat, ...) { + data.table::data.table( + label = "N", + description = "Number of subjects", + qualifiers = NA_character_, + value = nrow(dat) + ) + }, + mean_value = function(dat, var, ...) { + data.table::data.table( + label = "Mean", + description = paste("Mean of", var), + qualifiers = NA_character_, + value = mean(dat[[var]], na.rm = TRUE) + ) + } +) + +# The structure enables targets to handle multiple analysis functions +str(stats_config) } diff --git a/man/validate_stat_output.Rd b/man/validate_stat_output.Rd index 6abc6dd..e936741 100644 --- a/man/validate_stat_output.Rd +++ b/man/validate_stat_output.Rd @@ -10,10 +10,51 @@ validate_stat_output(output) \item{output}{The output from a statistical function.} } \value{ -An error message if validation fails, otherwise NA. +An error message (character) if validation fails, otherwise \code{NA_character_}. +Use with \code{\link[=try_and_validate]{try_and_validate()}} to automatically handle validation failures. } \description{ Validates the output of statistical functions to ensure it conforms to expected structure. The expected structure includes specific -column names and non-empty results. +column names (\code{label}, \code{description}, \code{qualifiers}, \code{value}) and at least one row. +This validator is typically used with \code{\link[=try_and_validate]{try_and_validate()}} to ensure +statistical functions return correctly formatted results. +} +\examples{ +library(data.table) + +# Valid statistical output - correctly formatted +valid_stats <- data.table( + label = c("N", "n_events", "Rate"), + description = c( + "Number of subjects", + "Number with adverse events", + "Percentage with events" + ), + qualifiers = NA_character_, + value = c(250L, 45L, 18.0) +) + +# Validation passes +validate_stat_output(valid_stats) # Returns NA_character_ + +# Invalid: wrong class (must be data.table) +invalid_vector <- c(N = 250, events = 45) +validate_stat_output(invalid_vector) # Returns error message + +# Invalid: missing required columns +incomplete_stats <- data.table( + label = c("N", "Rate"), + value = c(250L, 18.0) +) +validate_stat_output(incomplete_stats) # Shows missing columns + +# Invalid: empty result (0 rows) +empty_stats <- data.table( + label = character(), + description = character(), + qualifiers = character(), + value = numeric() +) +validate_stat_output(empty_stats) # Error: 0 rows returned } From a01a2d3f879497ef74236c118d0cdc7037a382f6 Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Wed, 15 Apr 2026 14:16:53 +0200 Subject: [PATCH 03/16] Fix remaining CRAN check errors and test failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add R (>= 4.1.0) to Depends: package uses native pipe |> syntax - Remove unused pryr/qs from Suggests; add qs2 which targets 1.12.0 now requires for its default storage format - Fix expand_over_endpoints: assigning list() to endpoint_group_metadata was silently dropped by data.table — use [[ ]] assignment with vector("list", nrow(.)) to correctly add the column - Wrap evaluate_criteria example in \dontrun{}: the function requires criterion_wrapper from a targets pipeline context and cannot run standalone - Fix expand_over_endpoints example: load data before building endpoint_def and pass actual CMCLAS values; endpoint_group_filter/metadata are output columns and must not be included in the input - Fix use_chef tests: add usethis::local_project(force=TRUE) to all tests that call use_chef() from a temp dir — usethis 3.2.1 now requires a project marker and rejects plain temp directories - Fix test-check_duplicate_functions: add dir.create("R") before tests that write to R/ and expect the subdirectory to already exist - Fix test-filter_db_data: swap dat[[1]]/dat[[2]] assertions to match the actual digest-based key ordering of the analysis_data_container --- .Rbuildignore | 5 + DESCRIPTION | 34 +++--- LICENSE | 11 +- NEWS.md | 8 ++ R/add_event_index.R | 35 ++++++ R/apply_criterion.R | 109 ++++++++++++++++++ R/evaluate_criteria.R | 28 +++++ R/expand_endpoints.R | 60 +++++++++- R/global.R | 21 ---- R/handle_mk_fn.R | 5 +- R/mk_userdef_fn_dt.R | 29 +++++ R/use_chef.R | 10 +- man/add_event_index.Rd | 35 ++++++ man/apply_criterion_by_strata.Rd | 58 ++++++++++ man/apply_criterion_endpoint.Rd | 51 ++++++++ man/define_expanded_ep.Rd | 20 ++++ man/evaluate_criteria.Rd | 29 +++++ man/expand_over_endpoints.Rd | 39 +++++++ man/handle_mk_fn.Rd | 4 +- man/mk_userdef_fn_dt.Rd | 30 +++++ man/run_pipeline.Rd | 2 +- man/stage_pipeline.Rd | 4 +- man/use_chef.Rd | 4 +- .../testthat/test-check_duplicate_functions.R | 8 ++ tests/testthat/test-filter_db_data.R | 4 +- tests/testthat/test-targets.R | 5 + tests/testthat/test-try_and_validate.R | 2 + tests/testthat/test-use_chef.R | 9 ++ 28 files changed, 598 insertions(+), 61 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 801e930..d919e6b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,3 +14,8 @@ ^.github$ ^.githooks$ ^.pre-commit-config.yaml +^\.claude$ +^PHASE2_.*\.md$ +^README_PHASE2_PACKAGE\.md$ +^cran-plan\.md$ +^define\.xml$ diff --git a/DESCRIPTION b/DESCRIPTION index f2ed282..cd33f2a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: chef -Title: Framework for generating statistical evidence +Title: Framework for Generating Statistical Evidence Version: 0.1.1 Authors@R: c( person("MEWP (Matthew David Phelps)", , , "mewp@novonordisk.com", role = "aut"), @@ -8,38 +8,43 @@ Authors@R: c( person("CINO (Christian Haargaard Olsen)", , , "cino@novonordisk.com", role = c("aut", "cre")) ) Description: Take endpoint recipes and transform them into statistical - outputs. + outputs. This package provides a framework for defining clinical trial + endpoints and generating statistical analyses using the {targets} pipeline + system, enabling reproducible and automated evidence generation workflows. + It is particularly useful for healthcare technology assessments and + regulatory submissions. License: MIT + file LICENSE +BugReports: https://github.com/hta-pharma/chef/issues Depends: - targets(>= 1.3.2) -Imports: + R (>= 4.1.0), + targets (>= 1.3.2) +Imports: checkmate, cli, data.table (>= 1.14.2), digest, - future, - future.callr, magrittr, methods, - qs, rlang, stringr, - tarchetypes, usethis, purrr, - stats, + stats Suggests: covr, + dplyr, fs, + future, + future.callr, + glue, htmltools (>= 0.5.4), kableExtra, knitr, pharmaverseadam, plyr, - dplyr, - glue, - pryr, + qs2, rmarkdown, + tarchetypes, testthat (>= 3.0.0), tidyr, withr, @@ -50,6 +55,5 @@ Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.3 -URL: - https://hta-pharma.github.io/chef/, - https://github.com/hta-pharma/chef +URL: https://hta-pharma.github.io/chef/, + https://github.com/hta-pharma/chef diff --git a/LICENSE b/LICENSE index 3f45ddd..9b23638 100644 --- a/LICENSE +++ b/LICENSE @@ -1,9 +1,2 @@ -MIT License - -Copyright (c) 2024 Novo Nordisk A/S, Danish company registration no. 24256790 - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +YEAR: 2024 +COPYRIGHT HOLDER: Novo Nordisk A/S diff --git a/NEWS.md b/NEWS.md index f15dafd..cf2ab9b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,13 @@ # chef 0.1.1 +* Added comprehensive examples to all exported functions for better documentation and CRAN compliance. +* Removed dependency on {testr} package and replaced with base {testthat}/{withr} equivalents for better CRAN compatibility. +* Improved test robustness by ensuring that changes to adam functions only invalidate targets that use those functions. +* Fixed Windows path handling issues. +* Updated test snapshots to use snapshot values. +* Moved {qs}, {future}, {future.callr}, and {tarchetypes} from Imports to Suggests in DESCRIPTION. +* Added BugReports field to DESCRIPTION and improved package metadata. + # chef 0.1.0 * Initial release. diff --git a/R/add_event_index.R b/R/add_event_index.R index c1b2849..f53d104 100644 --- a/R/add_event_index.R +++ b/R/add_event_index.R @@ -50,6 +50,41 @@ create_flag <- function(dat, var_value_pairs = NULL, singletons = NULL) { #' `INDEX_` column in the clinical data. This `INDEX_` column is created by #' chef when a user supplies a clinical dataset. #' @export +#' +#' @examples +#' library(data.table) +#' library(pharmaverseadam) +#' +#' # Prepare clinical data with INDEX_ column +#' adcm_data <- as.data.table(pharmaverseadam::adcm) +#' adcm_data[, INDEX_ := .I] +#' +#' analysis_data_container <- data.table( +#' dat = list(adcm_data), +#' key_analysis_data = "a" +#' ) +#' setkey(analysis_data_container, key_analysis_data) +#' +#' # Create endpoint with specific filters +#' ep <- data.table( +#' endpoint_id = 1L, +#' pop_var = "SAFFL", +#' pop_value = "Y", +#' period_var = NA_character_, +#' period_value = NA_character_, +#' endpoint_filter = NA_character_, +#' endpoint_group_filter = NA_character_, +#' custom_pop_filter = NA_character_, +#' key_analysis_data = "a" +#' ) +#' setkey(ep, key_analysis_data) +#' +#' # Add event index: identifies which rows match endpoint criteria +#' ep_with_index <- add_event_index(ep, analysis_data_container) +#' +#' # event_index contains row numbers from adcm_data matching the criteria +#' str(ep_with_index$event_index) # integer vector of INDEX_ values +#' length(ep_with_index$event_index[[1]]) # e.g., 47 events match SAFFL="Y" add_event_index <- function(ep, analysis_data_container) { event_index <- diff --git a/R/apply_criterion.R b/R/apply_criterion.R index d4aebb2..2e41331 100644 --- a/R/apply_criterion.R +++ b/R/apply_criterion.R @@ -13,6 +13,57 @@ #' @return A `data.table` with an additional logical column `crit_accept_endpoint` #' indicating whether each endpoint meets the defined criteria. #' @export +#' +#' @examples +#' library(data.table) +#' library(pharmaverseadam) +#' +#' # Create a simple criterion function: keep endpoint if n events >= 5 +#' min_events_fn <- function(dat, event_index, ...) { +#' length(event_index) >= 5 +#' } +#' +#' # Prepare endpoint with event index +#' adcm_data <- as.data.table(pharmaverseadam::adcm) +#' adcm_data[, INDEX_ := .I] +#' +#' analysis_data_container <- data.table( +#' dat = list(adcm_data), +#' key_analysis_data = "a" +#' ) +#' setkey(analysis_data_container, key_analysis_data) +#' +#' ep <- data.table( +#' endpoint_id = "1-0001", +#' endpoint_spec_id = 1L, +#' pop_var = "SAFFL", +#' pop_value = "Y", +#' period_var = NA_character_, +#' period_value = NA_character_, +#' endpoint_filter = NA_character_, +#' endpoint_group_filter = NA_character_, +#' custom_pop_filter = NA_character_, +#' stratify_by = list(), +#' treatment_var = "TRT01A", +#' treatment_refval = "Xanomeline High Dose", +#' event_index = list(1:20), # Simulated event indices +#' key_analysis_data = "a" +#' ) +#' setkey(ep, key_analysis_data) +#' +#' # Create function map linking endpoint to criterion +#' fn_map <- data.table( +#' endpoint_spec_id = 1L, +#' fn_type = "crit_endpoint", +#' fn_callable = list(min_events_fn), +#' fn_name = "min_events" +#' ) +#' +#' # Apply endpoint criterion +#' ep_filtered <- apply_criterion_endpoint(ep, analysis_data_container, fn_map) +#' +#' # Result: crit_accept_endpoint = TRUE (20 events >= 5) +#' ep_filtered[, .(endpoint_id, crit_accept_endpoint)] apply_criterion_endpoint <- function(ep, analysis_data_container, fn_map) { fn_type <- crit_accept_endpoint <- @@ -83,6 +134,64 @@ apply_criterion_endpoint <- function(ep, analysis_data_container, fn_map) { #' @return A `data.table` with one row per stratum for each endpoint, with an #' additional logical column indicating whether each row meets the criteria. #' @export +#' +#' @examples +#' library(data.table) +#' library(pharmaverseadam) +#' +#' # Create endpoint with stratification that already passed endpoint criteria +#' ep <- data.table( +#' endpoint_id = "1-0001", +#' endpoint_spec_id = 1L, +#' crit_accept_endpoint = TRUE, +#' stratify_by = list(c("SEX")), +#' strata_var = c("M", "F"), # Two strata plus TOTAL +#' event_index = list(1:20), +#' treatment_var = "TRT01A", +#' treatment_refval = "Xanomeline High Dose", +#' endpoint_filter = NA_character_, +#' endpoint_group_filter = NA_character_, +#' endpoint_group_metadata = list(), +#' custom_pop_filter = NA_character_, +#' period_var = NA_character_, +#' period_value = NA_character_, +#' key_analysis_data = "a" +#' ) +#' +#' # Prepare data container +#' adcm_data <- as.data.table(pharmaverseadam::adcm) +#' adcm_data[, INDEX_ := .I] +#' +#' analysis_data_container <- data.table( +#' dat = list(adcm_data), +#' key_analysis_data = "a" +#' ) +#' setkey(analysis_data_container, key_analysis_data) +#' setkey(ep, key_analysis_data) +#' +#' # Create strata-level criterion: keep strata if >= 3 subjects +#' min_subgroup_size <- function(dat, event_index, stratify_by, strata_var, ...) { +#' # This is a simplified example; actual implementation evaluates actual data +#' length(event_index) >= 3 +#' } +#' +#' fn_map <- data.table( +#' endpoint_spec_id = 1L, +#' fn_type = "crit_by_strata_by_trt", +#' fn_callable = list(min_subgroup_size), +#' fn_name = "min_subgroup_size" +#' ) +#' +#' # Apply strata criteria +#' ep_strata <- apply_criterion_by_strata( +#' ep = ep, +#' analysis_data_container = analysis_data_container, +#' fn_map = fn_map, +#' type = "by_strata_by_trt" +#' ) +#' +#' # Result: multiple rows per endpoint (one per stratum) +#' ep_strata[, .(endpoint_id, strata_var, crit_accept_by_strata_by_trt)] apply_criterion_by_strata <- function(ep, analysis_data_container, diff --git a/R/evaluate_criteria.R b/R/evaluate_criteria.R index caeae24..4800fb5 100644 --- a/R/evaluate_criteria.R +++ b/R/evaluate_criteria.R @@ -11,6 +11,34 @@ #' whether to keep the endpoint/strata or not. #' @export #' +#' @examples +#' \dontrun{ +#' library(data.table) +#' library(pharmaverseadam) +#' +#' # Define endpoints +#' endpoints <- data.table( +#' endpoint_spec_id = 1:2, +#' endpoint_label = c("Safety Events", "Efficacy Events"), +#' crit_endpoint = list(NULL, NULL), +#' key_analysis_data = "a" +#' ) +#' +#' # Prepare ADAM data in list format +#' adam_set <- list( +#' adcm = as.data.table(pharmaverseadam::adcm), +#' adae = as.data.table(pharmaverseadam::adae) +#' ) +#' +#' # evaluate_criteria is designed to run within a {targets} pipeline where +#' # criterion_wrapper is provided by the pipeline environment +#' result <- evaluate_criteria( +#' endpoints = endpoints, +#' adam_set = adam_set, +#' criteria_type = "endpoint" +#' ) +#' result[, .(endpoint_label, keep_endpoint)] +#' } evaluate_criteria <- function(endpoints, adam_set, diff --git a/R/expand_endpoints.R b/R/expand_endpoints.R index 2e2fb01..56ce7d8 100644 --- a/R/expand_endpoints.R +++ b/R/expand_endpoints.R @@ -15,6 +15,45 @@ #' @return A `data.table` where each row corresponds to an expanded endpoint #' definition #' @export +#' +#' @examples +#' library(data.table) +#' library(pharmaverseadam) +#' +#' # Prepare ADCM data +#' adcm <- as.data.table(pharmaverseadam::adcm)[!is.na(CMCLAS)] +#' cmclas_vals <- unique(adcm$CMCLAS) +#' +#' # Create endpoint definition expanding by therapeutic class +#' endpoint_def <- data.table( +#' endpoint_spec_id = 1L, +#' endpoint_label = "Concomitant Medications: ", +#' pop_var = "SAFFL", +#' pop_value = "Y", +#' period_var = NA_character_, +#' period_value = NA_character_, +#' treatment_var = "TRT01A", +#' treatment_refval = "Xanomeline High Dose", +#' endpoint_filter = NA_character_, +#' custom_pop_filter = NA_character_, +#' stratify_by = list(list()), +#' group_by = list(list(CMCLAS = cmclas_vals)), +#' key_analysis_data = "a" +#' ) +#' +#' # Create analysis data container +#' analysis_data <- data.table(dat = list(adcm), key_analysis_data = "a") +#' setkey(analysis_data, key_analysis_data) +#' setkey(endpoint_def, key_analysis_data) +#' +#' # Expand: 1 row becomes one row per unique CMCLAS value +#' expanded_ep <- expand_over_endpoints( +#' ep = endpoint_def, +#' analysis_data_container = analysis_data +#' ) +#' nrow(expanded_ep) +#' expanded_ep[, .(endpoint_id, endpoint_label, endpoint_group_filter)] +#' expand_over_endpoints <- function(ep, analysis_data_container) { expand_specification <- dat <- @@ -38,7 +77,7 @@ expand_over_endpoints <- function(ep, analysis_data_container) { } else { ep_exp <- ep_with_data[, .SD, .SDcols = setdiff(names(ep_with_data), "expand_specification")] ep_exp[, endpoint_group_filter := NA] - ep_exp[, endpoint_group_metadata := list()] + ep_exp[["endpoint_group_metadata"]] <- vector("list", nrow(ep_exp)) } ep_exp[, endpoint_id := add_ep_id(.SD, .BY), by = endpoint_spec_id] @@ -128,6 +167,25 @@ expand_over_endpoints <- function(ep, analysis_data_container) { #' consists only of `NA` values, the function returns `NA`. #' @export #' +#' @examples +#' library(data.table) +#' library(pharmaverseadam) +#' +#' # Load sample data and add INDEX_ column +#' adcm <- as.data.table(pharmaverseadam::adcm) +#' adcm <- adcm[!is.na(CMCLAS)][1:50] # Subset for brevity +#' +#' # Define grouping: expand endpoint by therapeutic class +#' group_by <- list(CMCLAS = unique(adcm$CMCLAS)) +#' +#' # Generate expanded endpoint specifications +#' expanded <- define_expanded_ep(x = adcm, group_by = group_by) +#' +#' # View structure: each row = one group level +#' expanded +#' # Note: endpoint_group_metadata contains the group values +#' # endpoint_group_filter contains the filter string (e.g., 'CMCLAS == "NERVOUS SYSTEM"') +#' define_expanded_ep <- function(x, group_by, forced_group_levels = NULL, col_prefix = "endpoint_group") { if (!is.list(group_by) || all(is.na(group_by))) { return(NA) diff --git a/R/global.R b/R/global.R index 7a6a9f7..90c327c 100644 --- a/R/global.R +++ b/R/global.R @@ -30,7 +30,6 @@ utils::globalVariables( "SEX", "adam", "adam_fn", - "AGEGR2", "endpoint", "endpoint_id", "endpoint_label", @@ -45,23 +44,3 @@ utils::globalVariables( "value" ) ) - - - -#' @noRd -helper_calls_to_imports <- function(){ - # Some packages will be needed when the user runs the pipeline, so we want - # those packages "Imported" in the DESCRIPTION file, so the user does not have - # any additional steps to install them after installing chef. However, the - # code for this is stored in the template files, and for some reason, R CMD - # check does not see theses files, so it gives a warning that we have - # dependencies listed in the DESCRIPTION file that are not used in the - # package. These notes are not allowed in our CI/CD checks, so we use this - # function to make just one call to each of those packages. - - qs::starnames[1, 1] - future::availableCores - future.callr::callr - tarchetypes::walk_ast - targets::tar_warning -} diff --git a/R/handle_mk_fn.R b/R/handle_mk_fn.R index 8fdd3f0..f5e65c6 100644 --- a/R/handle_mk_fn.R +++ b/R/handle_mk_fn.R @@ -1,10 +1,11 @@ #' Handle creation of endpoint def function #' +#' @param fn fn in list format #' @param pipeline_id The pipeline ID -#' @param fn_list fn in list format +#' @param r_functions_dir The directory where the custom R scripts go #' @param type Type of mk_* function: mk_endpoint_def (default), mk_criterion, #' or mk_adam. -#' @param r_functions_dir The directory where the custom R scripts go +#' @param env Environment #' #' @keywords internal handle_mk_fn <- diff --git a/R/mk_userdef_fn_dt.R b/R/mk_userdef_fn_dt.R index 220c755..1b9503e 100644 --- a/R/mk_userdef_fn_dt.R +++ b/R/mk_userdef_fn_dt.R @@ -27,6 +27,35 @@ #' #' @export #' +#' @examples +#' library(data.table) +#' +#' # Create function table with user-defined functions +#' # (typically from endpoint definitions via unnest_endpoint_functions) +#' endpoint_fns <- data.table( +#' endpoint_spec_id = c(1, 1, 2, 2), +#' fn_type = c("data_prepare", "stat_by_strata_by_trt", +#' "data_prepare", "stat_by_strata_by_trt"), +#' fn = list( +#' quote(function(study_metadata) NULL), +#' quote(function(dat, ...) nrow(dat)), +#' quote(function(study_metadata) NULL), +#' quote(function(dat, ...) nrow(dat)) +#' ), +#' fn_name = c("prep_fn", "n_subj", "prep_fn", "n_subj"), +#' fn_hash = c("hash1", "hash2", "hash1", "hash2") +#' ) +#' +#' # Parse functions into callable format +#' fn_table <- mk_userdef_fn_dt(endpoint_fns) +#' +#' # Result: unique functions by hash, deduplicated +#' nrow(fn_table) # 2 (hash1, hash2) +#' fn_table[, .(fn_type, fn_name)] +#' +#' # fn_callable column contains executable functions +#' fn_table$fn_callable[[1]] # Callable function +#' mk_userdef_fn_dt <- function(x, env=parent.frame()){ fn_type <- fn <- diff --git a/R/use_chef.R b/R/use_chef.R index 0e25817..9349d4e 100644 --- a/R/use_chef.R +++ b/R/use_chef.R @@ -10,7 +10,7 @@ #' #' @param pipeline_dir Character string ending with `/`. The directory where the #' targets pipeline scripts are to be stored. Keep in mind, wherever these -#' pipeline scripts are stored, the {targets} cache files will also be stored +#' pipeline scripts are stored, the targets cache files will also be stored #' (these cache files will not be under version control and thus only exist on #' your "machine"). #' @param r_functions_dir Character string ending with `/`. The directory where @@ -34,7 +34,7 @@ #' written, set `mk_adam_fn = NA`. #' @param mk_criteria_fn List of functions used for making the criteria for #' endpoint/analysis inclusion. This is useful if you want to supply already -#' existing functions that are not part of the {chefcriterion} package. This +#' existing functions that are not part of the chefcriterion package. This #' must be a list, and each element must be an unquoted function name (e.g. #' `my_criteria_fn`). The functions have to be available from the global #' environment (i.e if you type `my_criteria_fn()` into the console, it would @@ -124,7 +124,7 @@ use_chef <- stage_pipeline(pipeline_id = pipeline_id) } -#' Run a {targets} pipeline +#' Run a targets pipeline #' #' @description A wrapper for targets::tar_make() that ensures the correct #' pipeline is run, and the correct cache location is used for that pipeline. @@ -150,9 +150,9 @@ run_pipeline <- function(pipeline_id = NULL, targets::tar_make() } -#' Stage a {targets} pipeline so that you can work interactively with it +#' Stage a targets pipeline so that you can work interactively with it #' -#' @description To interact with a {targets} pipeline (e.g., run the pipeline, +#' @description To interact with a targets pipeline (e.g., run the pipeline, #' load the completed targets from cache into memory), targets needs to know #' which pipeline you want to work with. `This function` is a thin wrapper #' apound a Sys.setenv() call, and depends on the _targets.yaml file being set diff --git a/man/add_event_index.Rd b/man/add_event_index.Rd index 5a185cb..6546faf 100644 --- a/man/add_event_index.Rd +++ b/man/add_event_index.Rd @@ -29,3 +29,38 @@ pairs and singleton conditions that define specific events of interest within the dataset. This index can be used to identify events in the user-supplied criteria and/or statistical functions } +\examples{ +library(data.table) +library(pharmaverseadam) + +# Prepare clinical data with INDEX_ column +adcm_data <- as.data.table(pharmaverseadam::adcm) +adcm_data[, INDEX_ := .I] + +analysis_data_container <- data.table( + dat = list(adcm_data), + key_analysis_data = "a" +) +setkey(analysis_data_container, key_analysis_data) + +# Create endpoint with specific filters +ep <- data.table( + endpoint_id = 1L, + pop_var = "SAFFL", + pop_value = "Y", + period_var = NA_character_, + period_value = NA_character_, + endpoint_filter = NA_character_, + endpoint_group_filter = NA_character_, + custom_pop_filter = NA_character_, + key_analysis_data = "a" +) +setkey(ep, key_analysis_data) + +# Add event index: identifies which rows match endpoint criteria +ep_with_index <- add_event_index(ep, analysis_data_container) + +# event_index contains row numbers from adcm_data matching the criteria +str(ep_with_index$event_index) # integer vector of INDEX_ values +length(ep_with_index$event_index[[1]]) # e.g., 47 events match SAFFL="Y" +} diff --git a/man/apply_criterion_by_strata.Rd b/man/apply_criterion_by_strata.Rd index 91b56cc..5766a6c 100644 --- a/man/apply_criterion_by_strata.Rd +++ b/man/apply_criterion_by_strata.Rd @@ -33,3 +33,61 @@ functions to the endpoints data to determine eligibility for strata statistics. It adds a logical column to the data indicating whether each row meet the criteria. } +\examples{ +library(data.table) +library(pharmaverseadam) + +# Create endpoint with stratification that already passed endpoint criteria +ep <- data.table( + endpoint_id = "1-0001", + endpoint_spec_id = 1L, + crit_accept_endpoint = TRUE, + stratify_by = list(c("SEX")), + strata_var = c("M", "F"), # Two strata plus TOTAL + event_index = list(1:20), + treatment_var = "TRT01A", + treatment_refval = "Xanomeline High Dose", + endpoint_filter = NA_character_, + endpoint_group_filter = NA_character_, + endpoint_group_metadata = list(), + custom_pop_filter = NA_character_, + period_var = NA_character_, + period_value = NA_character_, + key_analysis_data = "a" +) + +# Prepare data container +adcm_data <- as.data.table(pharmaverseadam::adcm) +adcm_data[, INDEX_ := .I] + +analysis_data_container <- data.table( + dat = list(adcm_data), + key_analysis_data = "a" +) +setkey(analysis_data_container, key_analysis_data) +setkey(ep, key_analysis_data) + +# Create strata-level criterion: keep strata if >= 3 subjects +min_subgroup_size <- function(dat, event_index, stratify_by, strata_var, ...) { + # This is a simplified example; actual implementation evaluates actual data + length(event_index) >= 3 +} + +fn_map <- data.table( + endpoint_spec_id = 1L, + fn_type = "crit_by_strata_by_trt", + fn_callable = list(min_subgroup_size), + fn_name = "min_subgroup_size" +) + +# Apply strata criteria +ep_strata <- apply_criterion_by_strata( + ep = ep, + analysis_data_container = analysis_data_container, + fn_map = fn_map, + type = "by_strata_by_trt" +) + +# Result: multiple rows per endpoint (one per stratum) +ep_strata[, .(endpoint_id, strata_var, crit_accept_by_strata_by_trt)] +} diff --git a/man/apply_criterion_endpoint.Rd b/man/apply_criterion_endpoint.Rd index c76b5c0..6946f33 100644 --- a/man/apply_criterion_endpoint.Rd +++ b/man/apply_criterion_endpoint.Rd @@ -24,3 +24,54 @@ Applies the user-supplied criteria functions to each endpoint to determine eligibility based on the criteria. The result is a logical column added to the data indicating whether each endpoint meets the criteria. } +\examples{ +library(data.table) +library(pharmaverseadam) + +# Create a simple criterion function: keep endpoint if n events >= 5 +min_events_fn <- function(dat, event_index, ...) { + length(event_index) >= 5 +} + +# Prepare endpoint with event index +adcm_data <- as.data.table(pharmaverseadam::adcm) +adcm_data[, INDEX_ := .I] + +analysis_data_container <- data.table( + dat = list(adcm_data), + key_analysis_data = "a" +) +setkey(analysis_data_container, key_analysis_data) + +ep <- data.table( + endpoint_id = "1-0001", + endpoint_spec_id = 1L, + pop_var = "SAFFL", + pop_value = "Y", + period_var = NA_character_, + period_value = NA_character_, + endpoint_filter = NA_character_, + endpoint_group_filter = NA_character_, + custom_pop_filter = NA_character_, + stratify_by = list(), + treatment_var = "TRT01A", + treatment_refval = "Xanomeline High Dose", + event_index = list(1:20), # Simulated event indices + key_analysis_data = "a" +) +setkey(ep, key_analysis_data) + +# Create function map linking endpoint to criterion +fn_map <- data.table( + endpoint_spec_id = 1L, + fn_type = "crit_endpoint", + fn_callable = list(min_events_fn), + fn_name = "min_events" +) + +# Apply endpoint criterion +ep_filtered <- apply_criterion_endpoint(ep, analysis_data_container, fn_map) + +# Result: crit_accept_endpoint = TRUE (20 events >= 5) +ep_filtered[, .(endpoint_id, crit_accept_endpoint)] +} diff --git a/man/define_expanded_ep.Rd b/man/define_expanded_ep.Rd index a6e63cf..18a3913 100644 --- a/man/define_expanded_ep.Rd +++ b/man/define_expanded_ep.Rd @@ -36,3 +36,23 @@ filter strings for each endpoint group, which are used to subset the data accordingly. The function ensures that each endpoint group has the necessary information for further analysis. } +\examples{ +library(data.table) +library(pharmaverseadam) + +# Load sample data and add INDEX_ column +adcm <- as.data.table(pharmaverseadam::adcm) +adcm <- adcm[!is.na(CMCLAS)][1:50] # Subset for brevity + +# Define grouping: expand endpoint by therapeutic class +group_by <- list(CMCLAS = unique(adcm$CMCLAS)) + +# Generate expanded endpoint specifications +expanded <- define_expanded_ep(x = adcm, group_by = group_by) + +# View structure: each row = one group level +expanded +# Note: endpoint_group_metadata contains the group values +# endpoint_group_filter contains the filter string (e.g., 'CMCLAS == "NERVOUS SYSTEM"') + +} diff --git a/man/evaluate_criteria.Rd b/man/evaluate_criteria.Rd index 27af23d..8393399 100644 --- a/man/evaluate_criteria.Rd +++ b/man/evaluate_criteria.Rd @@ -27,3 +27,32 @@ whether to keep the endpoint/strata or not. \description{ Evaluate criteria for inclusion of endpoints or endpoint strata } +\examples{ +\dontrun{ +library(data.table) +library(pharmaverseadam) + +# Define endpoints +endpoints <- data.table( + endpoint_spec_id = 1:2, + endpoint_label = c("Safety Events", "Efficacy Events"), + crit_endpoint = list(NULL, NULL), + key_analysis_data = "a" +) + +# Prepare ADAM data in list format +adam_set <- list( + adcm = as.data.table(pharmaverseadam::adcm), + adae = as.data.table(pharmaverseadam::adae) +) + +# evaluate_criteria is designed to run within a {targets} pipeline where +# criterion_wrapper is provided by the pipeline environment +result <- evaluate_criteria( + endpoints = endpoints, + adam_set = adam_set, + criteria_type = "endpoint" +) +result[, .(endpoint_label, keep_endpoint)] +} +} diff --git a/man/expand_over_endpoints.Rd b/man/expand_over_endpoints.Rd index b22bff4..8c29e95 100644 --- a/man/expand_over_endpoints.Rd +++ b/man/expand_over_endpoints.Rd @@ -26,3 +26,42 @@ a mapping table that links these definitions to user-defined functions, which are then applied to the pre-processed ADaM datasets to create the expanded endpoints. } +\examples{ +library(data.table) +library(pharmaverseadam) + +# Prepare ADCM data +adcm <- as.data.table(pharmaverseadam::adcm)[!is.na(CMCLAS)] +cmclas_vals <- unique(adcm$CMCLAS) + +# Create endpoint definition expanding by therapeutic class +endpoint_def <- data.table( + endpoint_spec_id = 1L, + endpoint_label = "Concomitant Medications: ", + pop_var = "SAFFL", + pop_value = "Y", + period_var = NA_character_, + period_value = NA_character_, + treatment_var = "TRT01A", + treatment_refval = "Xanomeline High Dose", + endpoint_filter = NA_character_, + custom_pop_filter = NA_character_, + stratify_by = list(list()), + group_by = list(list(CMCLAS = cmclas_vals)), + key_analysis_data = "a" +) + +# Create analysis data container +analysis_data <- data.table(dat = list(adcm), key_analysis_data = "a") +setkey(analysis_data, key_analysis_data) +setkey(endpoint_def, key_analysis_data) + +# Expand: 1 row becomes one row per unique CMCLAS value +expanded_ep <- expand_over_endpoints( + ep = endpoint_def, + analysis_data_container = analysis_data +) +nrow(expanded_ep) +expanded_ep[, .(endpoint_id, endpoint_label, endpoint_group_filter)] + +} diff --git a/man/handle_mk_fn.Rd b/man/handle_mk_fn.Rd index dc6c21a..b63730f 100644 --- a/man/handle_mk_fn.Rd +++ b/man/handle_mk_fn.Rd @@ -13,6 +13,8 @@ handle_mk_fn( ) } \arguments{ +\item{fn}{fn in list format} + \item{pipeline_id}{The pipeline ID} \item{r_functions_dir}{The directory where the custom R scripts go} @@ -20,7 +22,7 @@ handle_mk_fn( \item{type}{Type of mk_* function: mk_endpoint_def (default), mk_criterion, or mk_adam.} -\item{fn_list}{fn in list format} +\item{env}{Environment} } \description{ Handle creation of endpoint def function diff --git a/man/mk_userdef_fn_dt.Rd b/man/mk_userdef_fn_dt.Rd index 7d68aec..1b7dfba 100644 --- a/man/mk_userdef_fn_dt.Rd +++ b/man/mk_userdef_fn_dt.Rd @@ -39,3 +39,33 @@ function. The output is a list of \code{data.table} objects, where each information about each function, including a character representation and the callable function itself. } +\examples{ +library(data.table) + +# Create function table with user-defined functions +# (typically from endpoint definitions via unnest_endpoint_functions) +endpoint_fns <- data.table( + endpoint_spec_id = c(1, 1, 2, 2), + fn_type = c("data_prepare", "stat_by_strata_by_trt", + "data_prepare", "stat_by_strata_by_trt"), + fn = list( + quote(function(study_metadata) NULL), + quote(function(dat, ...) nrow(dat)), + quote(function(study_metadata) NULL), + quote(function(dat, ...) nrow(dat)) + ), + fn_name = c("prep_fn", "n_subj", "prep_fn", "n_subj"), + fn_hash = c("hash1", "hash2", "hash1", "hash2") +) + +# Parse functions into callable format +fn_table <- mk_userdef_fn_dt(endpoint_fns) + +# Result: unique functions by hash, deduplicated +nrow(fn_table) # 2 (hash1, hash2) +fn_table[, .(fn_type, fn_name)] + +# fn_callable column contains executable functions +fn_table$fn_callable[[1]] # Callable function + +} diff --git a/man/run_pipeline.Rd b/man/run_pipeline.Rd index 086208d..f10c8dd 100644 --- a/man/run_pipeline.Rd +++ b/man/run_pipeline.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/use_chef.R \name{run_pipeline} \alias{run_pipeline} -\title{Run a {targets} pipeline} +\title{Run a targets pipeline} \usage{ run_pipeline(pipeline_id = NULL, pipeline_name = NULL) } diff --git a/man/stage_pipeline.Rd b/man/stage_pipeline.Rd index 1bf55fd..aa42616 100644 --- a/man/stage_pipeline.Rd +++ b/man/stage_pipeline.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/use_chef.R \name{stage_pipeline} \alias{stage_pipeline} -\title{Stage a {targets} pipeline so that you can work interactively with it} +\title{Stage a targets pipeline so that you can work interactively with it} \usage{ stage_pipeline(pipeline_id = NULL, pipeline_name = NULL) } @@ -18,7 +18,7 @@ leave \code{pipeline_id} blank and enter your custom pipeline name here.} Nothing, run for side effects } \description{ -To interact with a {targets} pipeline (e.g., run the pipeline, +To interact with a targets pipeline (e.g., run the pipeline, load the completed targets from cache into memory), targets needs to know which pipeline you want to work with. \verb{This function} is a thin wrapper apound a Sys.setenv() call, and depends on the _targets.yaml file being set diff --git a/man/use_chef.Rd b/man/use_chef.Rd index 5ebf21b..e5e1550 100644 --- a/man/use_chef.Rd +++ b/man/use_chef.Rd @@ -17,7 +17,7 @@ use_chef( \arguments{ \item{pipeline_dir}{Character string ending with \code{/}. The directory where the targets pipeline scripts are to be stored. Keep in mind, wherever these -pipeline scripts are stored, the {targets} cache files will also be stored +pipeline scripts are stored, the targets cache files will also be stored (these cache files will not be under version control and thus only exist on your "machine").} @@ -46,7 +46,7 @@ written, set \code{mk_adam_fn = NA}.} \item{mk_criteria_fn}{List of functions used for making the criteria for endpoint/analysis inclusion. This is useful if you want to supply already -existing functions that are not part of the {chefcriterion} package. This +existing functions that are not part of the chefcriterion package. This must be a list, and each element must be an unquoted function name (e.g. \code{my_criteria_fn}). The functions have to be available from the global environment (i.e if you type \code{my_criteria_fn()} into the console, it would diff --git a/tests/testthat/test-check_duplicate_functions.R b/tests/testthat/test-check_duplicate_functions.R index 11caad6..b2e6ead 100644 --- a/tests/testthat/test-check_duplicate_functions.R +++ b/tests/testthat/test-check_duplicate_functions.R @@ -1,6 +1,7 @@ test_that("check_duplicate_functions handles empty directory correctly", { tmp <- withr::local_tempdir() withr::local_dir(tmp) + dir.create("R") expect_null(check_duplicate_functions("R/")) }) @@ -9,6 +10,7 @@ test_that( { tmp <- withr::local_tempdir() withr::local_dir(tmp) + dir.create("R") write("f1 <- function(){}", "R/tmp.R") write("f2 <- function(){}", "R/tmp.R", append = TRUE) expect_null(check_duplicate_functions(dir = "R/")) @@ -18,6 +20,7 @@ test_that( test_that("check_duplicate_functions correctly identifies duplicate function names", { tmp <- withr::local_tempdir() withr::local_dir(tmp) + dir.create("R") write("f1 <- function(){}", "R/tmp.R") write("f1 <- function(){}", "R/tmp.R", append = TRUE) expect_error( @@ -37,6 +40,7 @@ test_that("check_duplicate_functions handles non-existent directory correctly", test_that("check_duplicate_functions handles directory with non-R files correctly", { tmp <- withr::local_tempdir() withr::local_dir(tmp) + dir.create("R") write("f1 <- function(){}", "R/tmp.R") write("f1 <- function(){}", "R/tmp.txt") expect_null(check_duplicate_functions("R")) @@ -45,6 +49,7 @@ test_that("check_duplicate_functions handles directory with non-R files correctl test_that("check_duplicate_functions handles directory with R files but no function definitions correctly", { tmp <- withr::local_tempdir() withr::local_dir(tmp) + dir.create("R") write("f1 <- function(){}", "R/tmp.R") write("f1 <- 5", "R/tmp.R", append = TRUE) expect_null(check_duplicate_functions("R")) @@ -53,6 +58,7 @@ test_that("check_duplicate_functions handles directory with R files but no funct test_that("check_duplicate_functions correctly identifies all duplicate function names", { tmp <- withr::local_tempdir() withr::local_dir(tmp) + dir.create("R") write("f1 <- function(){}", "R/tmp.R") write("f1 <- function(){}", "R/tmp.R", append = TRUE) write("f2 <- function(){}", "R/tmp.R", append = TRUE) @@ -67,6 +73,7 @@ test_that("check_duplicate_functions correctly identifies all duplicate function test_that("check_duplicate_functions handles function definitions with different parameters but same name correctly", { tmp <- withr::local_tempdir() withr::local_dir(tmp) + dir.create("R") write("f1 <- function(x){x}", "R/tmp.R") write("f1 <- function(y){y}", "R/tmp.R", append = TRUE) expect_error( @@ -79,6 +86,7 @@ test_that("check_duplicate_functions handles function definitions with different test_that("check_duplicate_functions treats functions with the same name but different case as distinct", { tmp <- withr::local_tempdir() withr::local_dir(tmp) + dir.create("R") write("f1 <- function(x){x}", "R/tmp.R") write("F1 <- function(x){x}", "R/tmp.R", append = TRUE) expect_null(check_duplicate_functions("R")) diff --git a/tests/testthat/test-filter_db_data.R b/tests/testthat/test-filter_db_data.R index 2c05ac2..0ab657e 100644 --- a/tests/testthat/test-filter_db_data.R +++ b/tests/testthat/test-filter_db_data.R @@ -155,9 +155,9 @@ test_that("filter_db_data works with >1 row in ep dataset", { actual <- filter_db_data(ep, ep_fn_map, adam_db) # EXPECT ------------------------------------------------------------------ - expect_equal(actual$analysis_data_container$dat[[1]], adam[SAFFL == "Y" & - CMSEQ >= 60]) expect_equal(actual$analysis_data_container$dat[[2]], adam[SAFFL == "Y" & + CMSEQ >= 60]) + expect_equal(actual$analysis_data_container$dat[[1]], adam[SAFFL == "Y" & CMSEQ >= 75]) }) diff --git a/tests/testthat/test-targets.R b/tests/testthat/test-targets.R index b8d5c4b..a77a65f 100644 --- a/tests/testthat/test-targets.R +++ b/tests/testthat/test-targets.R @@ -2,6 +2,7 @@ test_that("Base case: targets pipeline works", { # SETUP ------------------------------------------------------------------- tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) crit_endpoint <- function(...) { return(T) } @@ -74,6 +75,7 @@ test_that("targets pipeline works no criteria fn and missing by_* functions", # SETUP ------------------------------------------------------------------- tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_ep_def <- function() { ep <- mk_endpoint_str( @@ -129,6 +131,7 @@ test_that("branching after prepare for stats step works", { # SETUP ------------------------------------------------------------------- tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_ep_def <- function() { ep <- mk_endpoint_str( @@ -180,6 +183,7 @@ test_that("ep_fn_map is always outdated", { # SETUP ------------------------------------------------------------------- tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_ep_def <- function() { ep <- mk_endpoint_str( @@ -225,6 +229,7 @@ test_that("study_data responds to changes in source data", { # SETUP ------------------------------------------------------------------- tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) saveRDS(data.table(runif(10)), file = "tmp_data_obj.rds") mk_test_fn <- function(study_metadata) { readRDS("tmp_data_obj.rds") diff --git a/tests/testthat/test-try_and_validate.R b/tests/testthat/test-try_and_validate.R index e7640dc..583ce4d 100644 --- a/tests/testthat/test-try_and_validate.R +++ b/tests/testthat/test-try_and_validate.R @@ -156,6 +156,7 @@ test_that("test in a targets setting.", { # SETUP ------------------------------------------------------------------- tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) crit_endpoint <- function(...) { return(T) } @@ -270,6 +271,7 @@ test_that("loaded packages are included - In targets setting", { # SETUP ------------------------------------------------------------------- tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) crit_endpoint <- function(...) { return(T) } diff --git a/tests/testthat/test-use_chef.R b/tests/testthat/test-use_chef.R index abb98d7..5b411f2 100644 --- a/tests/testthat/test-use_chef.R +++ b/tests/testthat/test-use_chef.R @@ -3,6 +3,7 @@ test_that("use_chef makes top-level dirs and files", { testthat::skip_on_ci() tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) # ACT --------------------------------------------------------------------- use_chef(pipeline_id = "001") @@ -18,6 +19,7 @@ test_that("use_chef makes top-level dirs and fils when in Rproj", { testthat::skip_on_ci() tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) # ACT --------------------------------------------------------------------- use_chef(pipeline_id = "001") @@ -35,6 +37,7 @@ test_that("use_chef writes default R files", { testthat::skip_on_ci() tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) # ACT --------------------------------------------------------------------- use_chef(pipeline_id = "001") @@ -55,6 +58,7 @@ test_that("use_chef writes ammnog crit functions", { testthat::skip_on_ci() tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) crit_endpoint <- function() { "check" } @@ -81,6 +85,7 @@ test_that("use_chef writes custom mk_endpoint_def fn, and uses standard name", { testthat::skip_on_ci() tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_endpoint_custom <- function() { "check" } @@ -104,6 +109,7 @@ test_that("use_chef writes custom mk_adam fn", { testthat::skip_on_ci() tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_adam_custom <- function() { "check" } @@ -129,6 +135,7 @@ test_that("use_chef writes multiple mk_adam fn's", { testthat::skip_on_ci() tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_adam_custom <- function() { "check" } @@ -165,6 +172,7 @@ test_that("use_chef set-up in README works", { testthat::skip_on_ci() tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_endpoint_definition <- function() { mk_endpoint_str( study_metadata = list(), @@ -203,6 +211,7 @@ test_that("use_chef with custom pipeline_dir names works", { testthat::skip_on_ci() tmp <- withr::local_tempdir() withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_endpoint_definition <- function() { mk_endpoint_str( study_metadata = list(), From 3ebd50d7ac15776c7299d88fb71540976799535d Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Wed, 15 Apr 2026 15:17:23 +0200 Subject: [PATCH 04/16] Update Rbuildignore --- .Rbuildignore | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index d919e6b..801e930 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,8 +14,3 @@ ^.github$ ^.githooks$ ^.pre-commit-config.yaml -^\.claude$ -^PHASE2_.*\.md$ -^README_PHASE2_PACKAGE\.md$ -^cran-plan\.md$ -^define\.xml$ From c8ce2140c55b736cec1a07c31b274e38a39c634b Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Wed, 27 May 2026 14:55:43 +0200 Subject: [PATCH 05/16] Remove qs2 from DESCRIPTION file --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index cd33f2a..dc650d9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,7 +42,6 @@ Suggests: knitr, pharmaverseadam, plyr, - qs2, rmarkdown, tarchetypes, testthat (>= 3.0.0), From 4dd2ab883cd33d77068bb2375fac0361a27facd5 Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Wed, 27 May 2026 15:05:41 +0200 Subject: [PATCH 06/16] Add qs to Suggests in DESCRIPTION file --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index dc650d9..e7c493b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,6 +42,7 @@ Suggests: knitr, pharmaverseadam, plyr, + qs, rmarkdown, tarchetypes, testthat (>= 3.0.0), From 86b73ea0c12f54ff4242369cfa2c8fa236cc5cb1 Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Wed, 27 May 2026 15:12:05 +0200 Subject: [PATCH 07/16] Switch from qs to feather serialization format and add arrow to Suggests --- DESCRIPTION | 2 +- inst/templates/template-pipeline.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e7c493b..96c63be 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,6 +31,7 @@ Imports: purrr, stats Suggests: + arrow, covr, dplyr, fs, @@ -42,7 +43,6 @@ Suggests: knitr, pharmaverseadam, plyr, - qs, rmarkdown, tarchetypes, testthat (>= 3.0.0), diff --git a/inst/templates/template-pipeline.R b/inst/templates/template-pipeline.R index 7b106c5..d087c13 100644 --- a/inst/templates/template-pipeline.R +++ b/inst/templates/template-pipeline.R @@ -6,7 +6,7 @@ chef::check_duplicate_functions(fun_dir) ## Load your R files lapply(list.files(normalizePath(fun_dir), full.names = TRUE), source) -targets::tar_option_set(packages = c("chef", "data.table"), format = "qs") +targets::tar_option_set(packages = c("chef", "data.table"), format = "feather") list( targets::tar_target(ep, mk_endpoint_def()), From 50bb9722654dd20d7014acbdcb4303959f9e56c1 Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Wed, 27 May 2026 15:19:10 +0200 Subject: [PATCH 08/16] Switch from feather to rds serialization format for targets compatibility --- DESCRIPTION | 1 - inst/templates/template-pipeline.R | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 96c63be..dc650d9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,7 +31,6 @@ Imports: purrr, stats Suggests: - arrow, covr, dplyr, fs, diff --git a/inst/templates/template-pipeline.R b/inst/templates/template-pipeline.R index d087c13..e3b996d 100644 --- a/inst/templates/template-pipeline.R +++ b/inst/templates/template-pipeline.R @@ -6,7 +6,7 @@ chef::check_duplicate_functions(fun_dir) ## Load your R files lapply(list.files(normalizePath(fun_dir), full.names = TRUE), source) -targets::tar_option_set(packages = c("chef", "data.table"), format = "feather") +targets::tar_option_set(packages = c("chef", "data.table"), format = "rds") list( targets::tar_target(ep, mk_endpoint_def()), From 5deff9a0d04e0b2c5f438087bbada074af6af2aa Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Wed, 27 May 2026 22:51:27 +0200 Subject: [PATCH 09/16] Add tools/strip-remotes.R helper and ignore tools/ at build time Helper script removes the Remotes: block from DESCRIPTION before R CMD build, for symmetry with the sibling chef-ecosystem packages. chef itself has no chef-ecosystem deps so the helper is a safe no-op here, but having the same mechanism in every repo simplifies the CRAN submission workflow. Also ignore tools/ and cran-plan.md from R CMD build. Co-Authored-By: Claude Opus 4.7 --- .Rbuildignore | 2 ++ tools/strip-remotes.R | 68 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 tools/strip-remotes.R diff --git a/.Rbuildignore b/.Rbuildignore index 801e930..00f924c 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,3 +14,5 @@ ^.github$ ^.githooks$ ^.pre-commit-config.yaml +^tools$ +^cran-plan\.md$ diff --git a/tools/strip-remotes.R b/tools/strip-remotes.R new file mode 100644 index 0000000..9b512f9 --- /dev/null +++ b/tools/strip-remotes.R @@ -0,0 +1,68 @@ +#!/usr/bin/env Rscript +# Strip the `Remotes:` block from DESCRIPTION before building a CRAN tarball. +# +# Remotes is used during CI/development so dependent packages can pull +# unreleased versions of sibling packages from GitHub. CRAN rejects any +# package whose DESCRIPTION contains a Remotes field, so it must be removed +# from the tarball that is submitted. +# +# Usage (from the package root): +# Rscript tools/strip-remotes.R # rewrites DESCRIPTION in place +# Rscript tools/strip-remotes.R --build # also runs R CMD build on the cleaned source +# +# The --build flag produces _.tar.gz in the parent directory +# (R CMD build's default), then restores DESCRIPTION so the working tree +# is left as it was. + +args <- commandArgs(trailingOnly = TRUE) +do_build <- "--build" %in% args + +desc_path <- "DESCRIPTION" +if (!file.exists(desc_path)) { + stop("DESCRIPTION not found. Run this script from the package root.") +} + +original <- readLines(desc_path, warn = FALSE) + +# A DESCRIPTION field starts at column 1; continuation lines are indented. +# Drop the line matching `^Remotes:` and any continuation lines that follow. +field_start <- grepl("^[^[:space:]]", original) +remotes_idx <- which(grepl("^Remotes:", original)) + +if (length(remotes_idx) == 0) { + message("No Remotes: field found in DESCRIPTION; nothing to strip.") + cleaned <- original +} else { + drop <- integer() + for (i in remotes_idx) { + drop <- c(drop, i) + j <- i + 1L + while (j <= length(original) && !field_start[j]) { + drop <- c(drop, j) + j <- j + 1L + } + } + cleaned <- original[-drop] + message("Stripped Remotes block (", length(drop), " line(s)).") +} + +if (do_build) { + # Snapshot the original so we can restore it after R CMD build. + backup <- tempfile("DESCRIPTION-") + file.copy(desc_path, backup, overwrite = TRUE) + on.exit({ + file.copy(backup, desc_path, overwrite = TRUE) + unlink(backup) + message("Restored original DESCRIPTION.") + }, add = TRUE) + + writeLines(cleaned, desc_path) + status <- system2("R", c("CMD", "build", ".")) + if (status != 0) { + stop("R CMD build failed (exit ", status, ").") + } +} else { + writeLines(cleaned, desc_path) + message("DESCRIPTION rewritten in place. Re-run with --build to produce a tarball,", + " or restore from git when done submitting.") +} From 0e6f1a57e236d816755472b2eb30be3001479eeb Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Wed, 27 May 2026 23:03:02 +0200 Subject: [PATCH 10/16] Make test-filter_db_data order-insensitive for container datasets The "filter_db_data works with >1 row in ep dataset" test hardcoded dat[[1]] and dat[[2]] to specific custom_pop_filter values. The container's row order is determined by setkey() on a digest()-derived hash, which can differ between platforms or digest package versions. CI on Linux now returns the opposite order from the local macOS run that the previous swap was tuned for. Match each expected dataset to whichever container row equals it instead of hardcoding indices. Co-Authored-By: Claude Opus 4.7 --- tests/testthat/test-filter_db_data.R | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-filter_db_data.R b/tests/testthat/test-filter_db_data.R index 0ab657e..e4ee416 100644 --- a/tests/testthat/test-filter_db_data.R +++ b/tests/testthat/test-filter_db_data.R @@ -155,10 +155,20 @@ test_that("filter_db_data works with >1 row in ep dataset", { actual <- filter_db_data(ep, ep_fn_map, adam_db) # EXPECT ------------------------------------------------------------------ - expect_equal(actual$analysis_data_container$dat[[2]], adam[SAFFL == "Y" & - CMSEQ >= 60]) - expect_equal(actual$analysis_data_container$dat[[1]], adam[SAFFL == "Y" & - CMSEQ >= 75]) + # The container's row order is determined by digest() of an internal hash + # key, which can differ between platforms / digest versions. Match each + # expected dataset to whichever container row equals it. + actual_dat <- actual$analysis_data_container$dat + expect_length(actual_dat, 2) + match_60 <- which(vapply(actual_dat, + function(d) isTRUE(all.equal(d, adam[SAFFL == "Y" & CMSEQ >= 60])), + logical(1))) + match_75 <- which(vapply(actual_dat, + function(d) isTRUE(all.equal(d, adam[SAFFL == "Y" & CMSEQ >= 75])), + logical(1))) + expect_length(match_60, 1) + expect_length(match_75, 1) + expect_true(match_60 != match_75) }) From e943b2f83b633a957be904687cd0a96349ac2056 Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Thu, 28 May 2026 09:17:30 +0200 Subject: [PATCH 11/16] Update PR cleanup workflow --- .github/workflows/PR_cleanup.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/PR_cleanup.yaml b/.github/workflows/PR_cleanup.yaml index 5ed7997..d1ae081 100644 --- a/.github/workflows/PR_cleanup.yaml +++ b/.github/workflows/PR_cleanup.yaml @@ -11,3 +11,5 @@ jobs: - uses: stefanluptak/delete-old-pr-artifacts@v1 with: workflow_filename: Check.yaml + token: ${{ secrets.GITHUB_TOKEN }} +name: PR cleanup From a2ab8671ec58443eed93ec5d5e09fb2d3115c3a0 Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Thu, 28 May 2026 09:21:55 +0200 Subject: [PATCH 12/16] Update PR cleanup - again --- .github/workflows/PR_cleanup.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/PR_cleanup.yaml b/.github/workflows/PR_cleanup.yaml index d1ae081..5c3d25a 100644 --- a/.github/workflows/PR_cleanup.yaml +++ b/.github/workflows/PR_cleanup.yaml @@ -10,6 +10,4 @@ jobs: steps: - uses: stefanluptak/delete-old-pr-artifacts@v1 with: - workflow_filename: Check.yaml - token: ${{ secrets.GITHUB_TOKEN }} -name: PR cleanup + workflow_filename: ci.yaml From 47181788c60aa2a7277ecfc604f54a9eab7163b3 Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Thu, 28 May 2026 09:24:34 +0200 Subject: [PATCH 13/16] Use official github cleanup mechanism --- .github/workflows/PR_cleanup.yaml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/PR_cleanup.yaml b/.github/workflows/PR_cleanup.yaml index 5c3d25a..aa5bc8f 100644 --- a/.github/workflows/PR_cleanup.yaml +++ b/.github/workflows/PR_cleanup.yaml @@ -8,6 +8,10 @@ jobs: delete_pr_artifacts: runs-on: ubuntu-latest steps: - - uses: stefanluptak/delete-old-pr-artifacts@v1 - with: - workflow_filename: ci.yaml + - uses: actions/checkout@v3 + - name: Delete PR artifacts + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh actions-cache delete-all -R ${{ github.repository }} --confirm +name: PR cleanup From c36ec9ba0337a952394c743c0b767468c8c58ca8 Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Thu, 28 May 2026 09:26:10 +0200 Subject: [PATCH 14/16] Fix error in PR cleanup workflow --- .github/workflows/PR_cleanup.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/PR_cleanup.yaml b/.github/workflows/PR_cleanup.yaml index aa5bc8f..c48927c 100644 --- a/.github/workflows/PR_cleanup.yaml +++ b/.github/workflows/PR_cleanup.yaml @@ -14,4 +14,3 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | gh actions-cache delete-all -R ${{ github.repository }} --confirm -name: PR cleanup From 719dbc7558ff7b1060df21c8ec99204429957b77 Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Thu, 28 May 2026 09:28:43 +0200 Subject: [PATCH 15/16] Try again with PR cleanup --- .github/workflows/PR_cleanup.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/PR_cleanup.yaml b/.github/workflows/PR_cleanup.yaml index c48927c..b3b05f3 100644 --- a/.github/workflows/PR_cleanup.yaml +++ b/.github/workflows/PR_cleanup.yaml @@ -13,4 +13,4 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - gh actions-cache delete-all -R ${{ github.repository }} --confirm + gh cache delete-all -R ${{ github.repository }} --confirm From 827efcbae4389d136dbc93df33c12cee4af7b8bf Mon Sep 17 00:00:00 2001 From: "CINO (Christian Haargaard Olsen)" Date: Thu, 28 May 2026 09:30:30 +0200 Subject: [PATCH 16/16] Try again on PR cleanup --- .github/workflows/PR_cleanup.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/PR_cleanup.yaml b/.github/workflows/PR_cleanup.yaml index b3b05f3..fe11ee1 100644 --- a/.github/workflows/PR_cleanup.yaml +++ b/.github/workflows/PR_cleanup.yaml @@ -13,4 +13,4 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - gh cache delete-all -R ${{ github.repository }} --confirm + gh cache list -R ${{ github.repository }} --json id --jq '.[] | .id' | xargs -I {} gh cache delete -R ${{ github.repository }} {}