diff --git a/.Rbuildignore b/.Rbuildignore index 801e930..00f924c 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,3 +14,5 @@ ^.github$ ^.githooks$ ^.pre-commit-config.yaml +^tools$ +^cran-plan\.md$ diff --git a/.github/workflows/PR_cleanup.yaml b/.github/workflows/PR_cleanup.yaml index 5ed7997..fe11ee1 100644 --- a/.github/workflows/PR_cleanup.yaml +++ b/.github/workflows/PR_cleanup.yaml @@ -8,6 +8,9 @@ jobs: delete_pr_artifacts: runs-on: ubuntu-latest steps: - - uses: stefanluptak/delete-old-pr-artifacts@v1 - with: - workflow_filename: Check.yaml + - uses: actions/checkout@v3 + - name: Delete PR artifacts + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh cache list -R ${{ github.repository }} --json id --jq '.[] | .id' | xargs -I {} gh cache delete -R ${{ github.repository }} {} diff --git a/DESCRIPTION b/DESCRIPTION index 4fd9775..dc650d9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: chef -Title: Framework for generating statistical evidence +Title: Framework for Generating Statistical Evidence Version: 0.1.1 Authors@R: c( person("MEWP (Matthew David Phelps)", , , "mewp@novonordisk.com", role = "aut"), @@ -8,51 +8,51 @@ Authors@R: c( person("CINO (Christian Haargaard Olsen)", , , "cino@novonordisk.com", role = c("aut", "cre")) ) Description: Take endpoint recipes and transform them into statistical - outputs. + outputs. This package provides a framework for defining clinical trial + endpoints and generating statistical analyses using the {targets} pipeline + system, enabling reproducible and automated evidence generation workflows. + It is particularly useful for healthcare technology assessments and + regulatory submissions. License: MIT + file LICENSE +BugReports: https://github.com/hta-pharma/chef/issues Depends: - targets(>= 1.3.2) -Imports: + R (>= 4.1.0), + targets (>= 1.3.2) +Imports: checkmate, cli, data.table (>= 1.14.2), digest, - future, - future.callr, magrittr, methods, - qs, rlang, stringr, - tarchetypes, usethis, purrr, - stats, + stats Suggests: covr, + dplyr, fs, + future, + future.callr, + glue, htmltools (>= 0.5.4), kableExtra, knitr, pharmaverseadam, plyr, - dplyr, - glue, - pryr, rmarkdown, - testr, + tarchetypes, testthat (>= 3.0.0), tidyr, withr, whisker -VignetteBuilder: +VignetteBuilder: knitr -Remotes: - matthew-phelps/testr Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 -URL: - https://hta-pharma.github.io/chef/, - https://github.com/hta-pharma/chef +RoxygenNote: 7.3.3 +URL: https://hta-pharma.github.io/chef/, + https://github.com/hta-pharma/chef diff --git a/LICENSE b/LICENSE index 3f45ddd..9b23638 100644 --- a/LICENSE +++ b/LICENSE @@ -1,9 +1,2 @@ -MIT License - -Copyright (c) 2024 Novo Nordisk A/S, Danish company registration no. 24256790 - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +YEAR: 2024 +COPYRIGHT HOLDER: Novo Nordisk A/S diff --git a/NEWS.md b/NEWS.md index f15dafd..cf2ab9b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,13 @@ # chef 0.1.1 +* Added comprehensive examples to all exported functions for better documentation and CRAN compliance. +* Removed dependency on {testr} package and replaced with base {testthat}/{withr} equivalents for better CRAN compatibility. +* Improved test robustness by ensuring that changes to adam functions only invalidate targets that use those functions. +* Fixed Windows path handling issues. +* Updated test snapshots to use snapshot values. +* Moved {qs}, {future}, {future.callr}, and {tarchetypes} from Imports to Suggests in DESCRIPTION. +* Added BugReports field to DESCRIPTION and improved package metadata. + # chef 0.1.0 * Initial release. diff --git a/R/add_event_index.R b/R/add_event_index.R index c1b2849..f53d104 100644 --- a/R/add_event_index.R +++ b/R/add_event_index.R @@ -50,6 +50,41 @@ create_flag <- function(dat, var_value_pairs = NULL, singletons = NULL) { #' `INDEX_` column in the clinical data. This `INDEX_` column is created by #' chef when a user supplies a clinical dataset. #' @export +#' +#' @examples +#' library(data.table) +#' library(pharmaverseadam) +#' +#' # Prepare clinical data with INDEX_ column +#' adcm_data <- as.data.table(pharmaverseadam::adcm) +#' adcm_data[, INDEX_ := .I] +#' +#' analysis_data_container <- data.table( +#' dat = list(adcm_data), +#' key_analysis_data = "a" +#' ) +#' setkey(analysis_data_container, key_analysis_data) +#' +#' # Create endpoint with specific filters +#' ep <- data.table( +#' endpoint_id = 1L, +#' pop_var = "SAFFL", +#' pop_value = "Y", +#' period_var = NA_character_, +#' period_value = NA_character_, +#' endpoint_filter = NA_character_, +#' endpoint_group_filter = NA_character_, +#' custom_pop_filter = NA_character_, +#' key_analysis_data = "a" +#' ) +#' setkey(ep, key_analysis_data) +#' +#' # Add event index: identifies which rows match endpoint criteria +#' ep_with_index <- add_event_index(ep, analysis_data_container) +#' +#' # event_index contains row numbers from adcm_data matching the criteria +#' str(ep_with_index$event_index) # integer vector of INDEX_ values +#' length(ep_with_index$event_index[[1]]) # e.g., 47 events match SAFFL="Y" add_event_index <- function(ep, analysis_data_container) { event_index <- diff --git a/R/add_id.R b/R/add_id.R index 6dd1c8f..f014917 100644 --- a/R/add_id.R +++ b/R/add_id.R @@ -1,9 +1,33 @@ #' Add ID to user-defined endpoint groups #' +#' Adds a sequential identifier (`endpoint_spec_id`) to each row of an endpoint +#' definition table. These IDs track endpoint specifications through the analysis +#' pipeline and enable mapping of results back to original endpoint definitions. +#' #' @param ep A `data.table` containing endpoint definitions. #' -#' @return data.table +#' @return data.table with an additional `endpoint_spec_id` column containing +#' sequential integers starting from 1. +#' #' @export +#' +#' @examples +#' library(data.table) +#' +#' # Typical endpoint definition table from chef workflows +#' endpoints <- data.table( +#' endpoint_label = c("Primary Efficacy", "Safety", "Tolerability"), +#' analysis_type = c("efficacy", "adverse_event", "adverse_event"), +#' custom_pop_filter = c("AGE >= 18", "SAFFL=='Y'", "SAFFL=='Y'") +#' ) +#' +#' # Add sequential IDs for tracking through analysis pipeline +#' endpoints_with_ids <- add_id(endpoints) +#' +#' endpoints_with_ids +#' +#' # The endpoint_spec_id column now uniquely identifies each endpoint +#' # and can be used to map statistical results back to endpoint definitions add_id <- function(ep){ endpoint_spec_id <- NULL x <- copy(ep) diff --git a/R/apply_criterion.R b/R/apply_criterion.R index d4aebb2..2e41331 100644 --- a/R/apply_criterion.R +++ b/R/apply_criterion.R @@ -13,6 +13,57 @@ #' @return A `data.table` with an additional logical column `crit_accept_endpoint` #' indicating whether each endpoint meets the defined criteria. #' @export +#' +#' @examples +#' library(data.table) +#' library(pharmaverseadam) +#' +#' # Create a simple criterion function: keep endpoint if n events >= 5 +#' min_events_fn <- function(dat, event_index, ...) { +#' length(event_index) >= 5 +#' } +#' +#' # Prepare endpoint with event index +#' adcm_data <- as.data.table(pharmaverseadam::adcm) +#' adcm_data[, INDEX_ := .I] +#' +#' analysis_data_container <- data.table( +#' dat = list(adcm_data), +#' key_analysis_data = "a" +#' ) +#' setkey(analysis_data_container, key_analysis_data) +#' +#' ep <- data.table( +#' endpoint_id = "1-0001", +#' endpoint_spec_id = 1L, +#' pop_var = "SAFFL", +#' pop_value = "Y", +#' period_var = NA_character_, +#' period_value = NA_character_, +#' endpoint_filter = NA_character_, +#' endpoint_group_filter = NA_character_, +#' custom_pop_filter = NA_character_, +#' stratify_by = list(), +#' treatment_var = "TRT01A", +#' treatment_refval = "Xanomeline High Dose", +#' event_index = list(1:20), # Simulated event indices +#' key_analysis_data = "a" +#' ) +#' setkey(ep, key_analysis_data) +#' +#' # Create function map linking endpoint to criterion +#' fn_map <- data.table( +#' endpoint_spec_id = 1L, +#' fn_type = "crit_endpoint", +#' fn_callable = list(min_events_fn), +#' fn_name = "min_events" +#' ) +#' +#' # Apply endpoint criterion +#' ep_filtered <- apply_criterion_endpoint(ep, analysis_data_container, fn_map) +#' +#' # Result: crit_accept_endpoint = TRUE (20 events >= 5) +#' ep_filtered[, .(endpoint_id, crit_accept_endpoint)] apply_criterion_endpoint <- function(ep, analysis_data_container, fn_map) { fn_type <- crit_accept_endpoint <- @@ -83,6 +134,64 @@ apply_criterion_endpoint <- function(ep, analysis_data_container, fn_map) { #' @return A `data.table` with one row per stratum for each endpoint, with an #' additional logical column indicating whether each row meets the criteria. #' @export +#' +#' @examples +#' library(data.table) +#' library(pharmaverseadam) +#' +#' # Create endpoint with stratification that already passed endpoint criteria +#' ep <- data.table( +#' endpoint_id = "1-0001", +#' endpoint_spec_id = 1L, +#' crit_accept_endpoint = TRUE, +#' stratify_by = list(c("SEX")), +#' strata_var = c("M", "F"), # Two strata plus TOTAL +#' event_index = list(1:20), +#' treatment_var = "TRT01A", +#' treatment_refval = "Xanomeline High Dose", +#' endpoint_filter = NA_character_, +#' endpoint_group_filter = NA_character_, +#' endpoint_group_metadata = list(), +#' custom_pop_filter = NA_character_, +#' period_var = NA_character_, +#' period_value = NA_character_, +#' key_analysis_data = "a" +#' ) +#' +#' # Prepare data container +#' adcm_data <- as.data.table(pharmaverseadam::adcm) +#' adcm_data[, INDEX_ := .I] +#' +#' analysis_data_container <- data.table( +#' dat = list(adcm_data), +#' key_analysis_data = "a" +#' ) +#' setkey(analysis_data_container, key_analysis_data) +#' setkey(ep, key_analysis_data) +#' +#' # Create strata-level criterion: keep strata if >= 3 subjects +#' min_subgroup_size <- function(dat, event_index, stratify_by, strata_var, ...) { +#' # This is a simplified example; actual implementation evaluates actual data +#' length(event_index) >= 3 +#' } +#' +#' fn_map <- data.table( +#' endpoint_spec_id = 1L, +#' fn_type = "crit_by_strata_by_trt", +#' fn_callable = list(min_subgroup_size), +#' fn_name = "min_subgroup_size" +#' ) +#' +#' # Apply strata criteria +#' ep_strata <- apply_criterion_by_strata( +#' ep = ep, +#' analysis_data_container = analysis_data_container, +#' fn_map = fn_map, +#' type = "by_strata_by_trt" +#' ) +#' +#' # Result: multiple rows per endpoint (one per stratum) +#' ep_strata[, .(endpoint_id, strata_var, crit_accept_by_strata_by_trt)] apply_criterion_by_strata <- function(ep, analysis_data_container, diff --git a/R/check_duplicate_functions.R b/R/check_duplicate_functions.R index b2e1ee9..7bea580 100644 --- a/R/check_duplicate_functions.R +++ b/R/check_duplicate_functions.R @@ -1,11 +1,51 @@ #' Check for duplicate function definitions #' +#' Scans a directory for R files and identifies any function definitions that +#' appear multiple times (by name). This is useful in endpoint analysis pipelines +#' to prevent accidental function redefinitions that could cause unexpected +#' behavior. The check is case-sensitive (i.e., `func_name` and `Func_name` are +#' considered distinct). +#' #' @param dir The directory where the custom functions are defined #' -#' @return run for side-effects, if multiple functions are encountered it throws -#' an error +#' @return Run for side-effects. Returns `NULL` invisibly if no duplicates are +#' found. If duplicate functions are detected, an error is thrown with details +#' about which functions are duplicated. +#' #' @export #' +#' @examples +#' \dontrun{ +#' # Create a temporary directory for demonstration +#' tmp_dir <- tempdir() +#' +#' # Valid case: no duplicate function names +#' writeLines(" +#' compute_n_subjects <- function(dat, ...) { +#' nrow(dat) +#' } +#' ", file.path(tmp_dir, "stat_functions_1.R")) +#' +#' writeLines(" +#' compute_event_rate <- function(dat, ...) { +#' sum(dat$is_event) / nrow(dat) +#' } +#' ", file.path(tmp_dir, "stat_functions_2.R")) +#' +#' # Check passes - no duplicates found +#' check_duplicate_functions(tmp_dir) +#' +#' # Invalid case: duplicate function names cause error +#' writeLines(" +#' compute_n_subjects <- function(dat, ...) { +#' sum(!is.na(dat$id)) # Different implementation +#' } +#' ", file.path(tmp_dir, "stat_functions_2.R")) +#' +#' # This will error with message about "compute_n_subjects" being duplicated +#' check_duplicate_functions(tmp_dir) +#' } +#' check_duplicate_functions <- function(dir) { if (!dir.exists(dir)) { stop(paste0("Directory ", dir, " does not exist")) diff --git a/R/evaluate_criteria.R b/R/evaluate_criteria.R index caeae24..4800fb5 100644 --- a/R/evaluate_criteria.R +++ b/R/evaluate_criteria.R @@ -11,6 +11,34 @@ #' whether to keep the endpoint/strata or not. #' @export #' +#' @examples +#' \dontrun{ +#' library(data.table) +#' library(pharmaverseadam) +#' +#' # Define endpoints +#' endpoints <- data.table( +#' endpoint_spec_id = 1:2, +#' endpoint_label = c("Safety Events", "Efficacy Events"), +#' crit_endpoint = list(NULL, NULL), +#' key_analysis_data = "a" +#' ) +#' +#' # Prepare ADAM data in list format +#' adam_set <- list( +#' adcm = as.data.table(pharmaverseadam::adcm), +#' adae = as.data.table(pharmaverseadam::adae) +#' ) +#' +#' # evaluate_criteria is designed to run within a {targets} pipeline where +#' # criterion_wrapper is provided by the pipeline environment +#' result <- evaluate_criteria( +#' endpoints = endpoints, +#' adam_set = adam_set, +#' criteria_type = "endpoint" +#' ) +#' result[, .(endpoint_label, keep_endpoint)] +#' } evaluate_criteria <- function(endpoints, adam_set, diff --git a/R/expand_endpoints.R b/R/expand_endpoints.R index 2e2fb01..56ce7d8 100644 --- a/R/expand_endpoints.R +++ b/R/expand_endpoints.R @@ -15,6 +15,45 @@ #' @return A `data.table` where each row corresponds to an expanded endpoint #' definition #' @export +#' +#' @examples +#' library(data.table) +#' library(pharmaverseadam) +#' +#' # Prepare ADCM data +#' adcm <- as.data.table(pharmaverseadam::adcm)[!is.na(CMCLAS)] +#' cmclas_vals <- unique(adcm$CMCLAS) +#' +#' # Create endpoint definition expanding by therapeutic class +#' endpoint_def <- data.table( +#' endpoint_spec_id = 1L, +#' endpoint_label = "Concomitant Medications: ", +#' pop_var = "SAFFL", +#' pop_value = "Y", +#' period_var = NA_character_, +#' period_value = NA_character_, +#' treatment_var = "TRT01A", +#' treatment_refval = "Xanomeline High Dose", +#' endpoint_filter = NA_character_, +#' custom_pop_filter = NA_character_, +#' stratify_by = list(list()), +#' group_by = list(list(CMCLAS = cmclas_vals)), +#' key_analysis_data = "a" +#' ) +#' +#' # Create analysis data container +#' analysis_data <- data.table(dat = list(adcm), key_analysis_data = "a") +#' setkey(analysis_data, key_analysis_data) +#' setkey(endpoint_def, key_analysis_data) +#' +#' # Expand: 1 row becomes one row per unique CMCLAS value +#' expanded_ep <- expand_over_endpoints( +#' ep = endpoint_def, +#' analysis_data_container = analysis_data +#' ) +#' nrow(expanded_ep) +#' expanded_ep[, .(endpoint_id, endpoint_label, endpoint_group_filter)] +#' expand_over_endpoints <- function(ep, analysis_data_container) { expand_specification <- dat <- @@ -38,7 +77,7 @@ expand_over_endpoints <- function(ep, analysis_data_container) { } else { ep_exp <- ep_with_data[, .SD, .SDcols = setdiff(names(ep_with_data), "expand_specification")] ep_exp[, endpoint_group_filter := NA] - ep_exp[, endpoint_group_metadata := list()] + ep_exp[["endpoint_group_metadata"]] <- vector("list", nrow(ep_exp)) } ep_exp[, endpoint_id := add_ep_id(.SD, .BY), by = endpoint_spec_id] @@ -128,6 +167,25 @@ expand_over_endpoints <- function(ep, analysis_data_container) { #' consists only of `NA` values, the function returns `NA`. #' @export #' +#' @examples +#' library(data.table) +#' library(pharmaverseadam) +#' +#' # Load sample data and add INDEX_ column +#' adcm <- as.data.table(pharmaverseadam::adcm) +#' adcm <- adcm[!is.na(CMCLAS)][1:50] # Subset for brevity +#' +#' # Define grouping: expand endpoint by therapeutic class +#' group_by <- list(CMCLAS = unique(adcm$CMCLAS)) +#' +#' # Generate expanded endpoint specifications +#' expanded <- define_expanded_ep(x = adcm, group_by = group_by) +#' +#' # View structure: each row = one group level +#' expanded +#' # Note: endpoint_group_metadata contains the group values +#' # endpoint_group_filter contains the filter string (e.g., 'CMCLAS == "NERVOUS SYSTEM"') +#' define_expanded_ep <- function(x, group_by, forced_group_levels = NULL, col_prefix = "endpoint_group") { if (!is.list(group_by) || all(is.na(group_by))) { return(NA) diff --git a/R/global.R b/R/global.R index 7a6a9f7..90c327c 100644 --- a/R/global.R +++ b/R/global.R @@ -30,7 +30,6 @@ utils::globalVariables( "SEX", "adam", "adam_fn", - "AGEGR2", "endpoint", "endpoint_id", "endpoint_label", @@ -45,23 +44,3 @@ utils::globalVariables( "value" ) ) - - - -#' @noRd -helper_calls_to_imports <- function(){ - # Some packages will be needed when the user runs the pipeline, so we want - # those packages "Imported" in the DESCRIPTION file, so the user does not have - # any additional steps to install them after installing chef. However, the - # code for this is stored in the template files, and for some reason, R CMD - # check does not see theses files, so it gives a warning that we have - # dependencies listed in the DESCRIPTION file that are not used in the - # package. These notes are not allowed in our CI/CD checks, so we use this - # function to make just one call to each of those packages. - - qs::starnames[1, 1] - future::availableCores - future.callr::callr - tarchetypes::walk_ast - targets::tar_warning -} diff --git a/R/handle_mk_fn.R b/R/handle_mk_fn.R index 8fdd3f0..f5e65c6 100644 --- a/R/handle_mk_fn.R +++ b/R/handle_mk_fn.R @@ -1,10 +1,11 @@ #' Handle creation of endpoint def function #' +#' @param fn fn in list format #' @param pipeline_id The pipeline ID -#' @param fn_list fn in list format +#' @param r_functions_dir The directory where the custom R scripts go #' @param type Type of mk_* function: mk_endpoint_def (default), mk_criterion, #' or mk_adam. -#' @param r_functions_dir The directory where the custom R scripts go +#' @param env Environment #' #' @keywords internal handle_mk_fn <- diff --git a/R/mk_userdef_fn_dt.R b/R/mk_userdef_fn_dt.R index 220c755..1b9503e 100644 --- a/R/mk_userdef_fn_dt.R +++ b/R/mk_userdef_fn_dt.R @@ -27,6 +27,35 @@ #' #' @export #' +#' @examples +#' library(data.table) +#' +#' # Create function table with user-defined functions +#' # (typically from endpoint definitions via unnest_endpoint_functions) +#' endpoint_fns <- data.table( +#' endpoint_spec_id = c(1, 1, 2, 2), +#' fn_type = c("data_prepare", "stat_by_strata_by_trt", +#' "data_prepare", "stat_by_strata_by_trt"), +#' fn = list( +#' quote(function(study_metadata) NULL), +#' quote(function(dat, ...) nrow(dat)), +#' quote(function(study_metadata) NULL), +#' quote(function(dat, ...) nrow(dat)) +#' ), +#' fn_name = c("prep_fn", "n_subj", "prep_fn", "n_subj"), +#' fn_hash = c("hash1", "hash2", "hash1", "hash2") +#' ) +#' +#' # Parse functions into callable format +#' fn_table <- mk_userdef_fn_dt(endpoint_fns) +#' +#' # Result: unique functions by hash, deduplicated +#' nrow(fn_table) # 2 (hash1, hash2) +#' fn_table[, .(fn_type, fn_name)] +#' +#' # fn_callable column contains executable functions +#' fn_table$fn_callable[[1]] # Callable function +#' mk_userdef_fn_dt <- function(x, env=parent.frame()){ fn_type <- fn <- diff --git a/R/try_and_validate.R b/R/try_and_validate.R index 3eaea3c..1e27f5b 100644 --- a/R/try_and_validate.R +++ b/R/try_and_validate.R @@ -233,12 +233,54 @@ validate_crit_output <- function(output) { #' #' @description Validates the output of statistical functions to ensure it #' conforms to expected structure. The expected structure includes specific -#' column names and non-empty results. +#' column names (`label`, `description`, `qualifiers`, `value`) and at least one row. +#' This validator is typically used with [try_and_validate()] to ensure +#' statistical functions return correctly formatted results. #' #' @param output The output from a statistical function. #' -#' @return An error message if validation fails, otherwise NA. +#' @return An error message (character) if validation fails, otherwise `NA_character_`. +#' Use with [try_and_validate()] to automatically handle validation failures. +#' #' @export +#' +#' @examples +#' library(data.table) +#' +#' # Valid statistical output - correctly formatted +#' valid_stats <- data.table( +#' label = c("N", "n_events", "Rate"), +#' description = c( +#' "Number of subjects", +#' "Number with adverse events", +#' "Percentage with events" +#' ), +#' qualifiers = NA_character_, +#' value = c(250L, 45L, 18.0) +#' ) +#' +#' # Validation passes +#' validate_stat_output(valid_stats) # Returns NA_character_ +#' +#' # Invalid: wrong class (must be data.table) +#' invalid_vector <- c(N = 250, events = 45) +#' validate_stat_output(invalid_vector) # Returns error message +#' +#' # Invalid: missing required columns +#' incomplete_stats <- data.table( +#' label = c("N", "Rate"), +#' value = c(250L, 18.0) +#' ) +#' validate_stat_output(incomplete_stats) # Shows missing columns +#' +#' # Invalid: empty result (0 rows) +#' empty_stats <- data.table( +#' label = character(), +#' description = character(), +#' qualifiers = character(), +#' value = numeric() +#' ) +#' validate_stat_output(empty_stats) # Error: 0 rows returned validate_stat_output <- function(output) { # if not a DT return early if (!data.table::is.data.table(output)) { diff --git a/R/use_chef.R b/R/use_chef.R index 0e25817..9349d4e 100644 --- a/R/use_chef.R +++ b/R/use_chef.R @@ -10,7 +10,7 @@ #' #' @param pipeline_dir Character string ending with `/`. The directory where the #' targets pipeline scripts are to be stored. Keep in mind, wherever these -#' pipeline scripts are stored, the {targets} cache files will also be stored +#' pipeline scripts are stored, the targets cache files will also be stored #' (these cache files will not be under version control and thus only exist on #' your "machine"). #' @param r_functions_dir Character string ending with `/`. The directory where @@ -34,7 +34,7 @@ #' written, set `mk_adam_fn = NA`. #' @param mk_criteria_fn List of functions used for making the criteria for #' endpoint/analysis inclusion. This is useful if you want to supply already -#' existing functions that are not part of the {chefcriterion} package. This +#' existing functions that are not part of the chefcriterion package. This #' must be a list, and each element must be an unquoted function name (e.g. #' `my_criteria_fn`). The functions have to be available from the global #' environment (i.e if you type `my_criteria_fn()` into the console, it would @@ -124,7 +124,7 @@ use_chef <- stage_pipeline(pipeline_id = pipeline_id) } -#' Run a {targets} pipeline +#' Run a targets pipeline #' #' @description A wrapper for targets::tar_make() that ensures the correct #' pipeline is run, and the correct cache location is used for that pipeline. @@ -150,9 +150,9 @@ run_pipeline <- function(pipeline_id = NULL, targets::tar_make() } -#' Stage a {targets} pipeline so that you can work interactively with it +#' Stage a targets pipeline so that you can work interactively with it #' -#' @description To interact with a {targets} pipeline (e.g., run the pipeline, +#' @description To interact with a targets pipeline (e.g., run the pipeline, #' load the completed targets from cache into memory), targets needs to know #' which pipeline you want to work with. `This function` is a thin wrapper #' apound a Sys.setenv() call, and depends on the _targets.yaml file being set diff --git a/R/utils.R b/R/utils.R index f373026..ed83e06 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,11 +1,39 @@ #' Make list of lists #' +#' Creates a nested list structure where the input elements are wrapped in +#' an outer list. This is useful for organizing function specifications in +#' analysis pipelines, particularly when using the targets framework. +#' #' @param ... Elements to be included in the nested list #' @return A list object containing a list where each element is defined by #' `...` #' @importFrom magrittr %>% #' @import targets #' @export +#' @examples +#' # Create a nested list structure for endpoint statistics configurations +#' # Common use case: grouping multiple statistical functions by analysis level +#' stats_config <- llist( +#' n_subjects = function(dat, ...) { +#' data.table::data.table( +#' label = "N", +#' description = "Number of subjects", +#' qualifiers = NA_character_, +#' value = nrow(dat) +#' ) +#' }, +#' mean_value = function(dat, var, ...) { +#' data.table::data.table( +#' label = "Mean", +#' description = paste("Mean of", var), +#' qualifiers = NA_character_, +#' value = mean(dat[[var]], na.rm = TRUE) +#' ) +#' } +#' ) +#' +#' # The structure enables targets to handle multiple analysis functions +#' str(stats_config) llist <- function(...) { list(list(...)) } diff --git a/inst/templates/template-pipeline.R b/inst/templates/template-pipeline.R index 7b106c5..e3b996d 100644 --- a/inst/templates/template-pipeline.R +++ b/inst/templates/template-pipeline.R @@ -6,7 +6,7 @@ chef::check_duplicate_functions(fun_dir) ## Load your R files lapply(list.files(normalizePath(fun_dir), full.names = TRUE), source) -targets::tar_option_set(packages = c("chef", "data.table"), format = "qs") +targets::tar_option_set(packages = c("chef", "data.table"), format = "rds") list( targets::tar_target(ep, mk_endpoint_def()), diff --git a/man/add_event_index.Rd b/man/add_event_index.Rd index 5a185cb..6546faf 100644 --- a/man/add_event_index.Rd +++ b/man/add_event_index.Rd @@ -29,3 +29,38 @@ pairs and singleton conditions that define specific events of interest within the dataset. This index can be used to identify events in the user-supplied criteria and/or statistical functions } +\examples{ +library(data.table) +library(pharmaverseadam) + +# Prepare clinical data with INDEX_ column +adcm_data <- as.data.table(pharmaverseadam::adcm) +adcm_data[, INDEX_ := .I] + +analysis_data_container <- data.table( + dat = list(adcm_data), + key_analysis_data = "a" +) +setkey(analysis_data_container, key_analysis_data) + +# Create endpoint with specific filters +ep <- data.table( + endpoint_id = 1L, + pop_var = "SAFFL", + pop_value = "Y", + period_var = NA_character_, + period_value = NA_character_, + endpoint_filter = NA_character_, + endpoint_group_filter = NA_character_, + custom_pop_filter = NA_character_, + key_analysis_data = "a" +) +setkey(ep, key_analysis_data) + +# Add event index: identifies which rows match endpoint criteria +ep_with_index <- add_event_index(ep, analysis_data_container) + +# event_index contains row numbers from adcm_data matching the criteria +str(ep_with_index$event_index) # integer vector of INDEX_ values +length(ep_with_index$event_index[[1]]) # e.g., 47 events match SAFFL="Y" +} diff --git a/man/add_id.Rd b/man/add_id.Rd index 9740c1c..6ca9f24 100644 --- a/man/add_id.Rd +++ b/man/add_id.Rd @@ -10,8 +10,29 @@ add_id(ep) \item{ep}{A \code{data.table} containing endpoint definitions.} } \value{ -data.table +data.table with an additional \code{endpoint_spec_id} column containing +sequential integers starting from 1. } \description{ -Add ID to user-defined endpoint groups +Adds a sequential identifier (\code{endpoint_spec_id}) to each row of an endpoint +definition table. These IDs track endpoint specifications through the analysis +pipeline and enable mapping of results back to original endpoint definitions. +} +\examples{ +library(data.table) + +# Typical endpoint definition table from chef workflows +endpoints <- data.table( + endpoint_label = c("Primary Efficacy", "Safety", "Tolerability"), + analysis_type = c("efficacy", "adverse_event", "adverse_event"), + custom_pop_filter = c("AGE >= 18", "SAFFL=='Y'", "SAFFL=='Y'") +) + +# Add sequential IDs for tracking through analysis pipeline +endpoints_with_ids <- add_id(endpoints) + +endpoints_with_ids + +# The endpoint_spec_id column now uniquely identifies each endpoint +# and can be used to map statistical results back to endpoint definitions } diff --git a/man/apply_criterion_by_strata.Rd b/man/apply_criterion_by_strata.Rd index 91b56cc..5766a6c 100644 --- a/man/apply_criterion_by_strata.Rd +++ b/man/apply_criterion_by_strata.Rd @@ -33,3 +33,61 @@ functions to the endpoints data to determine eligibility for strata statistics. It adds a logical column to the data indicating whether each row meet the criteria. } +\examples{ +library(data.table) +library(pharmaverseadam) + +# Create endpoint with stratification that already passed endpoint criteria +ep <- data.table( + endpoint_id = "1-0001", + endpoint_spec_id = 1L, + crit_accept_endpoint = TRUE, + stratify_by = list(c("SEX")), + strata_var = c("M", "F"), # Two strata plus TOTAL + event_index = list(1:20), + treatment_var = "TRT01A", + treatment_refval = "Xanomeline High Dose", + endpoint_filter = NA_character_, + endpoint_group_filter = NA_character_, + endpoint_group_metadata = list(), + custom_pop_filter = NA_character_, + period_var = NA_character_, + period_value = NA_character_, + key_analysis_data = "a" +) + +# Prepare data container +adcm_data <- as.data.table(pharmaverseadam::adcm) +adcm_data[, INDEX_ := .I] + +analysis_data_container <- data.table( + dat = list(adcm_data), + key_analysis_data = "a" +) +setkey(analysis_data_container, key_analysis_data) +setkey(ep, key_analysis_data) + +# Create strata-level criterion: keep strata if >= 3 subjects +min_subgroup_size <- function(dat, event_index, stratify_by, strata_var, ...) { + # This is a simplified example; actual implementation evaluates actual data + length(event_index) >= 3 +} + +fn_map <- data.table( + endpoint_spec_id = 1L, + fn_type = "crit_by_strata_by_trt", + fn_callable = list(min_subgroup_size), + fn_name = "min_subgroup_size" +) + +# Apply strata criteria +ep_strata <- apply_criterion_by_strata( + ep = ep, + analysis_data_container = analysis_data_container, + fn_map = fn_map, + type = "by_strata_by_trt" +) + +# Result: multiple rows per endpoint (one per stratum) +ep_strata[, .(endpoint_id, strata_var, crit_accept_by_strata_by_trt)] +} diff --git a/man/apply_criterion_endpoint.Rd b/man/apply_criterion_endpoint.Rd index c76b5c0..6946f33 100644 --- a/man/apply_criterion_endpoint.Rd +++ b/man/apply_criterion_endpoint.Rd @@ -24,3 +24,54 @@ Applies the user-supplied criteria functions to each endpoint to determine eligibility based on the criteria. The result is a logical column added to the data indicating whether each endpoint meets the criteria. } +\examples{ +library(data.table) +library(pharmaverseadam) + +# Create a simple criterion function: keep endpoint if n events >= 5 +min_events_fn <- function(dat, event_index, ...) { + length(event_index) >= 5 +} + +# Prepare endpoint with event index +adcm_data <- as.data.table(pharmaverseadam::adcm) +adcm_data[, INDEX_ := .I] + +analysis_data_container <- data.table( + dat = list(adcm_data), + key_analysis_data = "a" +) +setkey(analysis_data_container, key_analysis_data) + +ep <- data.table( + endpoint_id = "1-0001", + endpoint_spec_id = 1L, + pop_var = "SAFFL", + pop_value = "Y", + period_var = NA_character_, + period_value = NA_character_, + endpoint_filter = NA_character_, + endpoint_group_filter = NA_character_, + custom_pop_filter = NA_character_, + stratify_by = list(), + treatment_var = "TRT01A", + treatment_refval = "Xanomeline High Dose", + event_index = list(1:20), # Simulated event indices + key_analysis_data = "a" +) +setkey(ep, key_analysis_data) + +# Create function map linking endpoint to criterion +fn_map <- data.table( + endpoint_spec_id = 1L, + fn_type = "crit_endpoint", + fn_callable = list(min_events_fn), + fn_name = "min_events" +) + +# Apply endpoint criterion +ep_filtered <- apply_criterion_endpoint(ep, analysis_data_container, fn_map) + +# Result: crit_accept_endpoint = TRUE (20 events >= 5) +ep_filtered[, .(endpoint_id, crit_accept_endpoint)] +} diff --git a/man/check_duplicate_functions.Rd b/man/check_duplicate_functions.Rd index 8bd80d4..fcb3cd7 100644 --- a/man/check_duplicate_functions.Rd +++ b/man/check_duplicate_functions.Rd @@ -10,9 +10,47 @@ check_duplicate_functions(dir) \item{dir}{The directory where the custom functions are defined} } \value{ -run for side-effects, if multiple functions are encountered it throws -an error +Run for side-effects. Returns \code{NULL} invisibly if no duplicates are +found. If duplicate functions are detected, an error is thrown with details +about which functions are duplicated. } \description{ -Check for duplicate function definitions +Scans a directory for R files and identifies any function definitions that +appear multiple times (by name). This is useful in endpoint analysis pipelines +to prevent accidental function redefinitions that could cause unexpected +behavior. The check is case-sensitive (i.e., \code{func_name} and \code{Func_name} are +considered distinct). +} +\examples{ +\dontrun{ + # Create a temporary directory for demonstration + tmp_dir <- tempdir() + + # Valid case: no duplicate function names + writeLines(" + compute_n_subjects <- function(dat, ...) { + nrow(dat) + } + ", file.path(tmp_dir, "stat_functions_1.R")) + + writeLines(" + compute_event_rate <- function(dat, ...) { + sum(dat$is_event) / nrow(dat) + } + ", file.path(tmp_dir, "stat_functions_2.R")) + + # Check passes - no duplicates found + check_duplicate_functions(tmp_dir) + + # Invalid case: duplicate function names cause error + writeLines(" + compute_n_subjects <- function(dat, ...) { + sum(!is.na(dat$id)) # Different implementation + } + ", file.path(tmp_dir, "stat_functions_2.R")) + + # This will error with message about "compute_n_subjects" being duplicated + check_duplicate_functions(tmp_dir) +} + } diff --git a/man/define_expanded_ep.Rd b/man/define_expanded_ep.Rd index a6e63cf..18a3913 100644 --- a/man/define_expanded_ep.Rd +++ b/man/define_expanded_ep.Rd @@ -36,3 +36,23 @@ filter strings for each endpoint group, which are used to subset the data accordingly. The function ensures that each endpoint group has the necessary information for further analysis. } +\examples{ +library(data.table) +library(pharmaverseadam) + +# Load sample data and add INDEX_ column +adcm <- as.data.table(pharmaverseadam::adcm) +adcm <- adcm[!is.na(CMCLAS)][1:50] # Subset for brevity + +# Define grouping: expand endpoint by therapeutic class +group_by <- list(CMCLAS = unique(adcm$CMCLAS)) + +# Generate expanded endpoint specifications +expanded <- define_expanded_ep(x = adcm, group_by = group_by) + +# View structure: each row = one group level +expanded +# Note: endpoint_group_metadata contains the group values +# endpoint_group_filter contains the filter string (e.g., 'CMCLAS == "NERVOUS SYSTEM"') + +} diff --git a/man/evaluate_criteria.Rd b/man/evaluate_criteria.Rd index 27af23d..8393399 100644 --- a/man/evaluate_criteria.Rd +++ b/man/evaluate_criteria.Rd @@ -27,3 +27,32 @@ whether to keep the endpoint/strata or not. \description{ Evaluate criteria for inclusion of endpoints or endpoint strata } +\examples{ +\dontrun{ +library(data.table) +library(pharmaverseadam) + +# Define endpoints +endpoints <- data.table( + endpoint_spec_id = 1:2, + endpoint_label = c("Safety Events", "Efficacy Events"), + crit_endpoint = list(NULL, NULL), + key_analysis_data = "a" +) + +# Prepare ADAM data in list format +adam_set <- list( + adcm = as.data.table(pharmaverseadam::adcm), + adae = as.data.table(pharmaverseadam::adae) +) + +# evaluate_criteria is designed to run within a {targets} pipeline where +# criterion_wrapper is provided by the pipeline environment +result <- evaluate_criteria( + endpoints = endpoints, + adam_set = adam_set, + criteria_type = "endpoint" +) +result[, .(endpoint_label, keep_endpoint)] +} +} diff --git a/man/expand_over_endpoints.Rd b/man/expand_over_endpoints.Rd index b22bff4..8c29e95 100644 --- a/man/expand_over_endpoints.Rd +++ b/man/expand_over_endpoints.Rd @@ -26,3 +26,42 @@ a mapping table that links these definitions to user-defined functions, which are then applied to the pre-processed ADaM datasets to create the expanded endpoints. } +\examples{ +library(data.table) +library(pharmaverseadam) + +# Prepare ADCM data +adcm <- as.data.table(pharmaverseadam::adcm)[!is.na(CMCLAS)] +cmclas_vals <- unique(adcm$CMCLAS) + +# Create endpoint definition expanding by therapeutic class +endpoint_def <- data.table( + endpoint_spec_id = 1L, + endpoint_label = "Concomitant Medications: ", + pop_var = "SAFFL", + pop_value = "Y", + period_var = NA_character_, + period_value = NA_character_, + treatment_var = "TRT01A", + treatment_refval = "Xanomeline High Dose", + endpoint_filter = NA_character_, + custom_pop_filter = NA_character_, + stratify_by = list(list()), + group_by = list(list(CMCLAS = cmclas_vals)), + key_analysis_data = "a" +) + +# Create analysis data container +analysis_data <- data.table(dat = list(adcm), key_analysis_data = "a") +setkey(analysis_data, key_analysis_data) +setkey(endpoint_def, key_analysis_data) + +# Expand: 1 row becomes one row per unique CMCLAS value +expanded_ep <- expand_over_endpoints( + ep = endpoint_def, + analysis_data_container = analysis_data +) +nrow(expanded_ep) +expanded_ep[, .(endpoint_id, endpoint_label, endpoint_group_filter)] + +} diff --git a/man/handle_mk_fn.Rd b/man/handle_mk_fn.Rd index dc6c21a..b63730f 100644 --- a/man/handle_mk_fn.Rd +++ b/man/handle_mk_fn.Rd @@ -13,6 +13,8 @@ handle_mk_fn( ) } \arguments{ +\item{fn}{fn in list format} + \item{pipeline_id}{The pipeline ID} \item{r_functions_dir}{The directory where the custom R scripts go} @@ -20,7 +22,7 @@ handle_mk_fn( \item{type}{Type of mk_* function: mk_endpoint_def (default), mk_criterion, or mk_adam.} -\item{fn_list}{fn in list format} +\item{env}{Environment} } \description{ Handle creation of endpoint def function diff --git a/man/llist.Rd b/man/llist.Rd index 6c0031a..7b487a5 100644 --- a/man/llist.Rd +++ b/man/llist.Rd @@ -14,5 +14,32 @@ A list object containing a list where each element is defined by \code{...} } \description{ -Make list of lists +Creates a nested list structure where the input elements are wrapped in +an outer list. This is useful for organizing function specifications in +analysis pipelines, particularly when using the targets framework. +} +\examples{ +# Create a nested list structure for endpoint statistics configurations +# Common use case: grouping multiple statistical functions by analysis level +stats_config <- llist( + n_subjects = function(dat, ...) { + data.table::data.table( + label = "N", + description = "Number of subjects", + qualifiers = NA_character_, + value = nrow(dat) + ) + }, + mean_value = function(dat, var, ...) { + data.table::data.table( + label = "Mean", + description = paste("Mean of", var), + qualifiers = NA_character_, + value = mean(dat[[var]], na.rm = TRUE) + ) + } +) + +# The structure enables targets to handle multiple analysis functions +str(stats_config) } diff --git a/man/mk_userdef_fn_dt.Rd b/man/mk_userdef_fn_dt.Rd index 7d68aec..1b7dfba 100644 --- a/man/mk_userdef_fn_dt.Rd +++ b/man/mk_userdef_fn_dt.Rd @@ -39,3 +39,33 @@ function. The output is a list of \code{data.table} objects, where each information about each function, including a character representation and the callable function itself. } +\examples{ +library(data.table) + +# Create function table with user-defined functions +# (typically from endpoint definitions via unnest_endpoint_functions) +endpoint_fns <- data.table( + endpoint_spec_id = c(1, 1, 2, 2), + fn_type = c("data_prepare", "stat_by_strata_by_trt", + "data_prepare", "stat_by_strata_by_trt"), + fn = list( + quote(function(study_metadata) NULL), + quote(function(dat, ...) nrow(dat)), + quote(function(study_metadata) NULL), + quote(function(dat, ...) nrow(dat)) + ), + fn_name = c("prep_fn", "n_subj", "prep_fn", "n_subj"), + fn_hash = c("hash1", "hash2", "hash1", "hash2") +) + +# Parse functions into callable format +fn_table <- mk_userdef_fn_dt(endpoint_fns) + +# Result: unique functions by hash, deduplicated +nrow(fn_table) # 2 (hash1, hash2) +fn_table[, .(fn_type, fn_name)] + +# fn_callable column contains executable functions +fn_table$fn_callable[[1]] # Callable function + +} diff --git a/man/run_pipeline.Rd b/man/run_pipeline.Rd index 086208d..f10c8dd 100644 --- a/man/run_pipeline.Rd +++ b/man/run_pipeline.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/use_chef.R \name{run_pipeline} \alias{run_pipeline} -\title{Run a {targets} pipeline} +\title{Run a targets pipeline} \usage{ run_pipeline(pipeline_id = NULL, pipeline_name = NULL) } diff --git a/man/stage_pipeline.Rd b/man/stage_pipeline.Rd index 1bf55fd..aa42616 100644 --- a/man/stage_pipeline.Rd +++ b/man/stage_pipeline.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/use_chef.R \name{stage_pipeline} \alias{stage_pipeline} -\title{Stage a {targets} pipeline so that you can work interactively with it} +\title{Stage a targets pipeline so that you can work interactively with it} \usage{ stage_pipeline(pipeline_id = NULL, pipeline_name = NULL) } @@ -18,7 +18,7 @@ leave \code{pipeline_id} blank and enter your custom pipeline name here.} Nothing, run for side effects } \description{ -To interact with a {targets} pipeline (e.g., run the pipeline, +To interact with a targets pipeline (e.g., run the pipeline, load the completed targets from cache into memory), targets needs to know which pipeline you want to work with. \verb{This function} is a thin wrapper apound a Sys.setenv() call, and depends on the _targets.yaml file being set diff --git a/man/use_chef.Rd b/man/use_chef.Rd index 5ebf21b..e5e1550 100644 --- a/man/use_chef.Rd +++ b/man/use_chef.Rd @@ -17,7 +17,7 @@ use_chef( \arguments{ \item{pipeline_dir}{Character string ending with \code{/}. The directory where the targets pipeline scripts are to be stored. Keep in mind, wherever these -pipeline scripts are stored, the {targets} cache files will also be stored +pipeline scripts are stored, the targets cache files will also be stored (these cache files will not be under version control and thus only exist on your "machine").} @@ -46,7 +46,7 @@ written, set \code{mk_adam_fn = NA}.} \item{mk_criteria_fn}{List of functions used for making the criteria for endpoint/analysis inclusion. This is useful if you want to supply already -existing functions that are not part of the {chefcriterion} package. This +existing functions that are not part of the chefcriterion package. This must be a list, and each element must be an unquoted function name (e.g. \code{my_criteria_fn}). The functions have to be available from the global environment (i.e if you type \code{my_criteria_fn()} into the console, it would diff --git a/man/validate_stat_output.Rd b/man/validate_stat_output.Rd index 6abc6dd..e936741 100644 --- a/man/validate_stat_output.Rd +++ b/man/validate_stat_output.Rd @@ -10,10 +10,51 @@ validate_stat_output(output) \item{output}{The output from a statistical function.} } \value{ -An error message if validation fails, otherwise NA. +An error message (character) if validation fails, otherwise \code{NA_character_}. +Use with \code{\link[=try_and_validate]{try_and_validate()}} to automatically handle validation failures. } \description{ Validates the output of statistical functions to ensure it conforms to expected structure. The expected structure includes specific -column names and non-empty results. +column names (\code{label}, \code{description}, \code{qualifiers}, \code{value}) and at least one row. +This validator is typically used with \code{\link[=try_and_validate]{try_and_validate()}} to ensure +statistical functions return correctly formatted results. +} +\examples{ +library(data.table) + +# Valid statistical output - correctly formatted +valid_stats <- data.table( + label = c("N", "n_events", "Rate"), + description = c( + "Number of subjects", + "Number with adverse events", + "Percentage with events" + ), + qualifiers = NA_character_, + value = c(250L, 45L, 18.0) +) + +# Validation passes +validate_stat_output(valid_stats) # Returns NA_character_ + +# Invalid: wrong class (must be data.table) +invalid_vector <- c(N = 250, events = 45) +validate_stat_output(invalid_vector) # Returns error message + +# Invalid: missing required columns +incomplete_stats <- data.table( + label = c("N", "Rate"), + value = c(250L, 18.0) +) +validate_stat_output(incomplete_stats) # Shows missing columns + +# Invalid: empty result (0 rows) +empty_stats <- data.table( + label = character(), + description = character(), + qualifiers = character(), + value = numeric() +) +validate_stat_output(empty_stats) # Error: 0 rows returned } diff --git a/tests/testthat/helper-skip-on-devops.R b/tests/testthat/helper-skip-on-devops.R deleted file mode 100644 index 6cdce80..0000000 --- a/tests/testthat/helper-skip-on-devops.R +++ /dev/null @@ -1,6 +0,0 @@ -skip_on_devops <- function() { - if (!identical(Sys.getenv("ON_DEVOPS"), "TRUE")) { - return(invisible(TRUE)) - } - testthat::skip("On DevOps") -} diff --git a/tests/testthat/test-add_event_index.R b/tests/testthat/test-add_event_index.R index 9dd3461..9d23767 100644 --- a/tests/testthat/test-add_event_index.R +++ b/tests/testthat/test-add_event_index.R @@ -96,7 +96,7 @@ test_that("add_event_index works over multiple rows in ep with custom filter", { test_that("add_event_index works over expanded endpoints", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() + testthat::skip_on_ci() ep <- mk_ep_0001_base( stratify_by = list(c("SEX")), data_prepare = mk_adcm, @@ -136,7 +136,7 @@ test_that("add_event_index works over expanded endpoints", { test_that("add_event_index works over expanded endpoints with endpoint filter", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() + testthat::skip_on_ci() ep <- mk_ep_0001_base( stratify_by = list(c("SEX")), data_prepare = mk_adcm, diff --git a/tests/testthat/test-apply_stats.R b/tests/testthat/test-apply_stats.R index 571cbab..63c681e 100644 --- a/tests/testthat/test-apply_stats.R +++ b/tests/testthat/test-apply_stats.R @@ -1,7 +1,7 @@ test_that("base: stat_by_strata_by_trt", { # SETUP ------------------------------------------------------------------- - skip_on_devops() + testthat::skip_on_ci() ep <- mk_ep_0001_base( stratify_by = list(c("SEX")), data_prepare = mk_adcm, @@ -76,7 +76,7 @@ test_that("base: stat_by_strata_by_trt", { test_that("validate: by_strata_by_trt returns same value as manual calculation with period flag", { # SETUP ------------------------------------------------------------------- - skip_on_devops() + testthat::skip_on_ci() ep <- mk_ep_0001_base( stratify_by = list(c("SEX")), data_prepare = mk_adcm, @@ -157,7 +157,7 @@ test_that("validate: by_strata_by_trt returns same value as manual calculation w test_that("by_strata_by_trt returns same value as manual calculation without period flag", { # SETUP ------------------------------------------------------------------- - skip_on_devops() + testthat::skip_on_ci() ep <- mk_ep_0001_base( stratify_by = list(c("SEX")), data_prepare = mk_adcm, @@ -239,7 +239,7 @@ test_that("by_strata_by_trt returns same value as manual calculation without per test_that("validate: n_sub return correct value", { # SETUP ------------------------------------------------------------------- - skip_on_devops() + testthat::skip_on_ci() ep <- mk_ep_0001_base( stratify_by = list(c("SEX")), data_prepare = mk_adcm, @@ -317,7 +317,7 @@ test_that("validate: n_sub return correct value", { test_that("apply_stats stat_by_strata_across_trt", { # SETUP ------------------------------------------------------------------- - skip_on_devops() + testthat::skip_on_ci() ep <- mk_ep_0001_base( custom_pop_filter = "TRT01A %in% c('Placebo', 'Xanomeline High Dose')", @@ -396,7 +396,7 @@ test_that("apply_stats stat_by_strata_across_trt", { test_that("apply_stats stat_across_strata_across_trt when no across_strata_across_trt fn supplied", { # SETUP ------------------------------------------------------------------- - skip_on_devops() + testthat::skip_on_ci() ep <- mk_endpoint_str( study_metadata = list(), pop_var = "SAFFL", @@ -472,7 +472,7 @@ test_that("apply_stats stat_across_strata_across_trt when no across_strata_acros test_that("apply_stats: with all FALSE for criteria", { # SETUP ------------------------------------------------------------------- - skip_on_devops() + testthat::skip_on_ci() crit_false <- function(...) FALSE diff --git a/tests/testthat/test-check_duplicate_functions.R b/tests/testthat/test-check_duplicate_functions.R index af52495..b2e6ead 100644 --- a/tests/testthat/test-check_duplicate_functions.R +++ b/tests/testthat/test-check_duplicate_functions.R @@ -1,12 +1,16 @@ test_that("check_duplicate_functions handles empty directory correctly", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + dir.create("R") expect_null(check_duplicate_functions("R/")) }) test_that( "check_duplicate_functions handles directory with no duplicate function names correctly", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + dir.create("R") write("f1 <- function(){}", "R/tmp.R") write("f2 <- function(){}", "R/tmp.R", append = TRUE) expect_null(check_duplicate_functions(dir = "R/")) @@ -14,7 +18,9 @@ test_that( ) test_that("check_duplicate_functions correctly identifies duplicate function names", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + dir.create("R") write("f1 <- function(){}", "R/tmp.R") write("f1 <- function(){}", "R/tmp.R", append = TRUE) expect_error( @@ -23,7 +29,8 @@ test_that("check_duplicate_functions correctly identifies duplicate function nam }) test_that("check_duplicate_functions handles non-existent directory correctly", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) expect_error( check_duplicate_functions("R_fun"), "Directory R_fun does not exist" @@ -31,21 +38,27 @@ test_that("check_duplicate_functions handles non-existent directory correctly", }) test_that("check_duplicate_functions handles directory with non-R files correctly", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + dir.create("R") write("f1 <- function(){}", "R/tmp.R") write("f1 <- function(){}", "R/tmp.txt") expect_null(check_duplicate_functions("R")) }) test_that("check_duplicate_functions handles directory with R files but no function definitions correctly", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + dir.create("R") write("f1 <- function(){}", "R/tmp.R") write("f1 <- 5", "R/tmp.R", append = TRUE) expect_null(check_duplicate_functions("R")) }) test_that("check_duplicate_functions correctly identifies all duplicate function names", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + dir.create("R") write("f1 <- function(){}", "R/tmp.R") write("f1 <- function(){}", "R/tmp.R", append = TRUE) write("f2 <- function(){}", "R/tmp.R", append = TRUE) @@ -58,7 +71,9 @@ test_that("check_duplicate_functions correctly identifies all duplicate function }) test_that("check_duplicate_functions handles function definitions with different parameters but same name correctly", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + dir.create("R") write("f1 <- function(x){x}", "R/tmp.R") write("f1 <- function(y){y}", "R/tmp.R", append = TRUE) expect_error( @@ -69,7 +84,9 @@ test_that("check_duplicate_functions handles function definitions with different test_that("check_duplicate_functions treats functions with the same name but different case as distinct", { - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + dir.create("R") write("f1 <- function(x){x}", "R/tmp.R") write("F1 <- function(x){x}", "R/tmp.R", append = TRUE) expect_null(check_duplicate_functions("R")) diff --git a/tests/testthat/test-filter_db_data.R b/tests/testthat/test-filter_db_data.R index 2c05ac2..e4ee416 100644 --- a/tests/testthat/test-filter_db_data.R +++ b/tests/testthat/test-filter_db_data.R @@ -155,10 +155,20 @@ test_that("filter_db_data works with >1 row in ep dataset", { actual <- filter_db_data(ep, ep_fn_map, adam_db) # EXPECT ------------------------------------------------------------------ - expect_equal(actual$analysis_data_container$dat[[1]], adam[SAFFL == "Y" & - CMSEQ >= 60]) - expect_equal(actual$analysis_data_container$dat[[2]], adam[SAFFL == "Y" & - CMSEQ >= 75]) + # The container's row order is determined by digest() of an internal hash + # key, which can differ between platforms / digest versions. Match each + # expected dataset to whichever container row equals it. + actual_dat <- actual$analysis_data_container$dat + expect_length(actual_dat, 2) + match_60 <- which(vapply(actual_dat, + function(d) isTRUE(all.equal(d, adam[SAFFL == "Y" & CMSEQ >= 60])), + logical(1))) + match_75 <- which(vapply(actual_dat, + function(d) isTRUE(all.equal(d, adam[SAFFL == "Y" & CMSEQ >= 75])), + logical(1))) + expect_length(match_60, 1) + expect_length(match_75, 1) + expect_true(match_60 != match_75) }) diff --git a/tests/testthat/test-pipeline_manual.R b/tests/testthat/test-pipeline_manual.R index 66c9d3a..f9f8e1c 100644 --- a/tests/testthat/test-pipeline_manual.R +++ b/tests/testthat/test-pipeline_manual.R @@ -1,5 +1,5 @@ test_that("Manual pipeline works", { - testr::skip_on_devops() + testthat::skip_on_ci() crit_endpoint <- function(dat, event_index, diff --git a/tests/testthat/test-targets.R b/tests/testthat/test-targets.R index d7627f5..a77a65f 100644 --- a/tests/testthat/test-targets.R +++ b/tests/testthat/test-targets.R @@ -1,6 +1,8 @@ test_that("Base case: targets pipeline works", { # SETUP ------------------------------------------------------------------- - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) crit_endpoint <- function(...) { return(T) } @@ -71,7 +73,9 @@ test_that("Base case: targets pipeline works", { test_that("targets pipeline works no criteria fn and missing by_* functions", { # SETUP ------------------------------------------------------------------- - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_ep_def <- function() { ep <- mk_endpoint_str( @@ -125,7 +129,9 @@ test_that("targets pipeline works no criteria fn and missing by_* functions", test_that("branching after prepare for stats step works", { # SETUP ------------------------------------------------------------------- - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_ep_def <- function() { ep <- mk_endpoint_str( @@ -175,7 +181,9 @@ test_that("branching after prepare for stats step works", { test_that("ep_fn_map is always outdated", { # SETUP ------------------------------------------------------------------- - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_ep_def <- function() { ep <- mk_endpoint_str( @@ -219,7 +227,9 @@ test_that("ep_fn_map is always outdated", { test_that("study_data responds to changes in source data", { # SETUP ------------------------------------------------------------------- - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) saveRDS(data.table(runif(10)), file = "tmp_data_obj.rds") mk_test_fn <- function(study_metadata) { readRDS("tmp_data_obj.rds") diff --git a/tests/testthat/test-try_and_validate.R b/tests/testthat/test-try_and_validate.R index 2ae5a11..583ce4d 100644 --- a/tests/testthat/test-try_and_validate.R +++ b/tests/testthat/test-try_and_validate.R @@ -151,10 +151,12 @@ test_that( ) test_that("test in a targets setting.", { - testr::skip_on_devops() + testthat::skip_on_ci() # SETUP ------------------------------------------------------------------- - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) crit_endpoint <- function(...) { return(T) } @@ -264,10 +266,12 @@ test_that("loaded packages are included.", { test_that("loaded packages are included - In targets setting", { - testr::skip_on_devops() + testthat::skip_on_ci() # SETUP ------------------------------------------------------------------- - testr::create_local_project() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) crit_endpoint <- function(...) { return(T) } diff --git a/tests/testthat/test-use_chef.R b/tests/testthat/test-use_chef.R index 9049c66..5b411f2 100644 --- a/tests/testthat/test-use_chef.R +++ b/tests/testthat/test-use_chef.R @@ -1,7 +1,9 @@ test_that("use_chef makes top-level dirs and files", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) # ACT --------------------------------------------------------------------- use_chef(pipeline_id = "001") @@ -14,8 +16,10 @@ test_that("use_chef makes top-level dirs and files", { test_that("use_chef makes top-level dirs and fils when in Rproj", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project(rstudio = TRUE) + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) # ACT --------------------------------------------------------------------- use_chef(pipeline_id = "001") @@ -30,8 +34,10 @@ test_that("use_chef makes top-level dirs and fils when in Rproj", { test_that("use_chef writes default R files", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) # ACT --------------------------------------------------------------------- use_chef(pipeline_id = "001") @@ -49,8 +55,10 @@ test_that("use_chef writes default R files", { test_that("use_chef writes ammnog crit functions", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) crit_endpoint <- function() { "check" } @@ -74,8 +82,10 @@ test_that("use_chef writes ammnog crit functions", { test_that("use_chef writes custom mk_endpoint_def fn, and uses standard name", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_endpoint_custom <- function() { "check" } @@ -96,8 +106,10 @@ test_that("use_chef writes custom mk_endpoint_def fn, and uses standard name", { test_that("use_chef writes custom mk_adam fn", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_adam_custom <- function() { "check" } @@ -120,8 +132,10 @@ test_that("use_chef writes custom mk_adam fn", { test_that("use_chef writes multiple mk_adam fn's", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_adam_custom <- function() { "check" } @@ -155,8 +169,10 @@ test_that("use_chef writes multiple mk_adam fn's", { test_that("use_chef set-up in README works", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_endpoint_definition <- function() { mk_endpoint_str( study_metadata = list(), @@ -192,8 +208,10 @@ test_that("use_chef set-up in README works", { }) test_that("use_chef with custom pipeline_dir names works", { # SETUP ------------------------------------------------------------------- - testr::skip_on_devops() - testr::create_local_project() + testthat::skip_on_ci() + tmp <- withr::local_tempdir() + withr::local_dir(tmp) + usethis::local_project(tmp, force = TRUE, setwd = FALSE, quiet = TRUE) mk_endpoint_definition <- function() { mk_endpoint_str( study_metadata = list(), diff --git a/tools/strip-remotes.R b/tools/strip-remotes.R new file mode 100644 index 0000000..9b512f9 --- /dev/null +++ b/tools/strip-remotes.R @@ -0,0 +1,68 @@ +#!/usr/bin/env Rscript +# Strip the `Remotes:` block from DESCRIPTION before building a CRAN tarball. +# +# Remotes is used during CI/development so dependent packages can pull +# unreleased versions of sibling packages from GitHub. CRAN rejects any +# package whose DESCRIPTION contains a Remotes field, so it must be removed +# from the tarball that is submitted. +# +# Usage (from the package root): +# Rscript tools/strip-remotes.R # rewrites DESCRIPTION in place +# Rscript tools/strip-remotes.R --build # also runs R CMD build on the cleaned source +# +# The --build flag produces _.tar.gz in the parent directory +# (R CMD build's default), then restores DESCRIPTION so the working tree +# is left as it was. + +args <- commandArgs(trailingOnly = TRUE) +do_build <- "--build" %in% args + +desc_path <- "DESCRIPTION" +if (!file.exists(desc_path)) { + stop("DESCRIPTION not found. Run this script from the package root.") +} + +original <- readLines(desc_path, warn = FALSE) + +# A DESCRIPTION field starts at column 1; continuation lines are indented. +# Drop the line matching `^Remotes:` and any continuation lines that follow. +field_start <- grepl("^[^[:space:]]", original) +remotes_idx <- which(grepl("^Remotes:", original)) + +if (length(remotes_idx) == 0) { + message("No Remotes: field found in DESCRIPTION; nothing to strip.") + cleaned <- original +} else { + drop <- integer() + for (i in remotes_idx) { + drop <- c(drop, i) + j <- i + 1L + while (j <= length(original) && !field_start[j]) { + drop <- c(drop, j) + j <- j + 1L + } + } + cleaned <- original[-drop] + message("Stripped Remotes block (", length(drop), " line(s)).") +} + +if (do_build) { + # Snapshot the original so we can restore it after R CMD build. + backup <- tempfile("DESCRIPTION-") + file.copy(desc_path, backup, overwrite = TRUE) + on.exit({ + file.copy(backup, desc_path, overwrite = TRUE) + unlink(backup) + message("Restored original DESCRIPTION.") + }, add = TRUE) + + writeLines(cleaned, desc_path) + status <- system2("R", c("CMD", "build", ".")) + if (status != 0) { + stop("R CMD build failed (exit ", status, ").") + } +} else { + writeLines(cleaned, desc_path) + message("DESCRIPTION rewritten in place. Re-run with --build to produce a tarball,", + " or restore from git when done submitting.") +}