vimc · pratikunterwegs · Jan 20, 2026 · Jan 22, 2026 · Jan 22, 2026 · Jan 22, 2026
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: vimcheck
 Title: Diagnostics for Vaccine Impact Modelling Consortium Burden and
     Impact Estimates
-Version: 0.0.1
+Version: 0.0.3
 Authors@R: c(
     person("Pratik", "Gupte", , "p.gupte24@imperial.ac.uk", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0001-5294-7819")),
@@ -38,6 +38,7 @@ Suggests:
     spelling,
     testthat (>= 3.0.0),
     tibble,
+    vdiffr,
     withr
 VignetteBuilder: 
     knitr

diff --git a/NAMESPACE b/NAMESPACE
@@ -10,10 +10,30 @@ export(plot_coverage_set)
 export(plot_fvp)
 export(plot_global_burden)
 export(plot_global_burden_decades)
+export(prep_plot_age)
+export(prep_plot_burden_decades)
+export(prep_plot_coverage_set)
+export(prep_plot_demography)
+export(prep_plot_fvp)
+export(prep_plot_global_burden)
 export(theme_vimc)
+export(theme_vimc_noxaxis)
 export(transform_coverage_fvps)
 export(validate_complete_incoming_files)
 export(validate_file_dict_template)
 export(validate_template_alignment)
 importFrom(dplyr,.data)
+importFrom(ggplot2,'%+replace%')
+importFrom(ggplot2,aes)
+importFrom(ggplot2,facet_grid)
+importFrom(ggplot2,facet_wrap)
+importFrom(ggplot2,geom_col)
+importFrom(ggplot2,geom_hline)
 importFrom(ggplot2,ggplot)
+importFrom(ggplot2,label_wrap_gen)
+importFrom(ggplot2,labeller)
+importFrom(ggplot2,labs)
+importFrom(ggplot2,scale_fill_distiller)
+importFrom(ggplot2,scale_x_continuous)
+importFrom(ggplot2,scale_y_continuous)
+importFrom(ggplot2,vars)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,15 @@
+# vimcheck 0.0.3
+
+- Separated data-prep for plotting from plotting functions.
+
+- Added tests for plotting preparation and plotting functions.
+
+# vimcheck 0.0.2
+
+- Added initial data validation functions and some tests.
+
+- Added VIMC _ggplot2_ theme.
+
 # vimcheck 0.0.1
 
 - Added GHA workflows to notify of changed dependencies, auto-render `README.md` from `README.Rmd`, and update the copyright year annually.

diff --git a/R/burden_diagnositics.R b/R/burden_diagnositics.R
@@ -31,7 +31,7 @@
  template <- file.path(path_burden, "file_dictionary.csv")

  if (file.exists(template)) {
    # TODO: check that file_dictionary entries are acceptable?
    data_dict <- readr::read_csv(
      template,
      show_col_types = FALSE
@@ -76,7 +76,7 @@
     checkmate::assert_data_frame(
       sce,
       any.missing = TRUE, # allowing missing as contained in examples
-      min.cols = length(scenario_data_colnames),
+      min.cols = length(scenario_data_colnames)
     )
     checkmate::assert_names(
       colnames(sce),
@@ -170,10 +170,13 @@
     )
 
     if (!are_good_scefiles) {
+      # prevent linting as lintr cannot see usage inside glue::glue()
+      # nolint start
       extra_files <- setdiff(sce_files, scenario_filenames)
       n_extra_files <- length(extra_files)
       missing_files <- setdiff(scenario_filenames, sce_files)
       n_missing_files <- length(missing_files)
+      # nolint end
 
       cli::cli_abort(
         c(
@@ -186,7 +189,7 @@
           {cli::no(n_missing_files)} missing files{? /} \\
           {.file {basename(missing_files)}}",
           i = "Directory searched: {.file {path_burden}}"
-        ),
+        )
       )
     }
   } else {
@@ -244,13 +247,9 @@
 
   missing_grid_in_burden <- dplyr::setdiff(template_grid, burden_grid)
   extra_grid_in_burden <- dplyr::setdiff(burden_grid, template_grid)
-  burden_grid_matches_template <- all(
-    c(
-      nrow(missing_grid_in_burden),
-      nrow(extra_grid_in_burden)
-    ) ==
-      0L
-  )
+  burden_grid_matches_template <- nrow(missing_grid_in_burden) +
+    nrow(extra_grid_in_burden) ==
+    0L
 
   list(
     missing_cols_in_burden = missing_cols_in_burden,
@@ -286,24 +285,21 @@
   wpp,
   gender = c("Both", "Male", "Female")
 ) {
-  # TODO: input checks
   checkmate::assert_data_frame(burden_set)
   checkmate::assert_data_frame(wpp)
 
   gender <- rlang::arg_match(gender)
 
-  cols_to_select <- c("country", "year", "age", "cohort_size")
+  cols_to_select <- c("country", "year", "age", "cohort_size", "scenario")
   provided <- dplyr::select(
     burden_set,
     {{ cols_to_select }}
   )
   provided <- dplyr::mutate(
     provided,
-    provided = cohort_size
+    provided = .data$cohort_size
   )
 
-  # TODO: explain what expected is
-  # TODO: replace with a right-join?
   expected <- dplyr::filter(
     wpp,
     gender == {{ gender }}
@@ -315,9 +311,10 @@
     expected,
     {{ cols_to_select }}
   )
+
   expected <- dplyr::rename(
     expected,
-    expected = value
+    expected = value # nolint due to tidyselect conventions
   )
 
   # return left join
@@ -328,9 +325,9 @@
   )
   alignment <- dplyr::mutate(
     alignment,
-    difference = provided - expected,
-    abs_diff = abs(difference),
-    prop_diff = difference / expected
+    difference = .data$provided - .data$expected,
+    abs_diff = abs(.data$difference),
+    prop_diff = .data$difference / .data$expected
   )
 
   alignment
@@ -351,14 +348,15 @@
 #'
 #' @export
 basic_burden_sanity <- function(burden) {
   # TODO: expectations on burden
-  mes <- "Basic sanity check for burden estimates:"
+  mes_start <- "Basic sanity check for burden estimates:"
+  mes <- mes_start
 
   value_col <- "value"
   value <- burden[[value_col]]
 
-  if (is.numeric(burden$value)) {
-    if (anyNA(burden$value)) {
+  if (is.numeric(value)) {
+    if (anyNA(value)) {
       mes_any_missing <- glue::glue(
         "Warning: Burden estimates should not have missing values, but some \\
         values are missing. Fix missing values by converting to zeros!"
@@ -367,7 +365,7 @@
       mes <- c(mes, mes_any_missing)
     }
 
-    if (any(burden$value < 0, na.rm = TRUE)) {
+    if (any(value < 0, na.rm = TRUE)) {
       mes_any_negative <- glue::glue(
         "Warning: Burden estimates should all be positive or zero, but found \\
         some negative estimates!"
@@ -384,7 +382,7 @@
     mes <- c(mes, mes_not_numeric)
   }
 
-  if (length(mes) == 1L) {
+  if (mes == mes_start) {
     mes <- c(mes, "PASS.")
   }
 
@@ -411,13 +409,13 @@
 #'
 #' @export
 transform_coverage_fvps <- function(coverage, wpp) {
  # TODO: checks on coverage
  # TODO: checks on wpp

   cols_to_select <- c("age_from", "age_to", "gender")
   todo_list <- dplyr::select(
     coverage,
-    cols_to_select
+    {{ cols_to_select }}
   )
   todo_list <- dplyr::distinct(todo_list)
   todo_list <- dplyr::mutate(
@@ -425,31 +423,30 @@
    job = seq_along(.data$gender)
  )

  # TODO: THIS NEEDS TO BE CLEANED UP
   # TODO: clarify structure of `coverage` and mapping of gender to age
   pop_all <- list()
   for (i in seq_along(todo_list$age_from)) {
-    pop_all[[i]] <- wpp %>%
-      x <- dplyr::filter(
+    x <- dplyr::filter(
       wpp,
-      .data$age >= todo_list$age_from[i],
-      .data$age <= todo_list$age_to[i],
+      dplyr::between(.data$age, todo_list$age_from[i], todo_list$age_to[i]),
       .data$gender == todo_list$gender[i]
     )
-    x <- dplyr::group_by(x, .data$country, .data$year)
     x <- dplyr::summarise(
       x,
       target_wpp = sum(.data$value),
-      .groups = "drop"
+      .by = c("country", "year")
     )
     x <- dplyr::mutate(
       x,
       job = todo_list$job[i]
     )
+
+    pop_all[[i]] <- x
   }
   pop_all <- dplyr::bind_rows(pop_all)
 
  # TODO: add comments or explain in fn docs
  d <- dplyr::left_join(
    coverage,
    pop_all,
@@ -477,7 +474,7 @@
  d
 }

 # TODO: fill out fn docs
 #' @title
 #'
 #' @description
@@ -496,7 +493,7 @@
 #'
 #' @export
 impact_check <- function(burden, scenario_order) {
  # TODO: input checks
  scenario_cols <- c("scenario", "scenario_order")
  scenario_order <- dplyr::select(scenario_order, {{ scenario_cols }})

@@ -526,7 +523,7 @@
    values_from = "million"
  )

  # TODO: CLEAN THIS UP
  for (i in 2:nrow(scenario_order)) {
    for (j in 1:(i - 1)) {
      if (any(d[i + 1] > d[j + 1])) {

diff --git a/R/constants.R b/R/constants.R
@@ -21,3 +21,22 @@ scenario_data_colnames <- c(
   "scenario",
   "scenario_description"
 )
+
+#' @name constants
+burden_outcome_names <- c(
+  "cases",
+  "deaths",
+  "dalys",
+  "yll"
+)
+
+#' @name constants
+colnames_plot_demog_compare <- c(
+  "variable",
+  "scenario",
+  "year",
+  "age",
+  "country",
+  "value",
+  "value_millions"
+)
diff --git a/R/example_data.R b/R/example_data.R
@@ -24,7 +24,7 @@
 #' groups.
 #'
 #' @format ## `eg_burden_template`
-#' A data frame with 10,201 rows and 10 columns:
+#' A data frame with 10,201 rows and 11 columns:
 #' \describe{
 #'   \item{disease}{Disease name}
 #'   \item{year}{Year}
@@ -36,6 +36,7 @@
 #'   \item{deaths}{Deaths averted}.
 #'   \item{yll}{Years of life-loss averted}.
 #'   \item{cohort_size}{Population size of the country in a year}.
+#'   \itme{scenario}{Vaccination scenario.}
 #' }
 #'
 #' @keywords data