diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index bfc9f4d..48dbc76 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -1,27 +1,20 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: - push: - branches: [main, master] pull_request: - release: - types: [published] + branches: [main, dev] + push: + branches: [main] workflow_dispatch: -name: pkgdown.yaml - -permissions: read-all +name: pkgdown jobs: pkgdown: runs-on: ubuntu-latest - # Only restrict concurrency for non-PR jobs - concurrency: - group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} permissions: contents: write + steps: - uses: actions/checkout@v4 @@ -33,17 +26,54 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::pkgdown, local::. + extra-packages: any::pkgdown needs: website - - name: Build site - run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + - name: Generate sample reports shell: Rscript {0} + run: | + install.packages(c("devtools", "here")) + devtools::load_all() + dir.create(here::here("pkgdown", "assets"), recursive = TRUE) + library(gsm.kri) + library(gsm.core) - - name: Deploy to GitHub pages 🚀 - if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@v4.5.0 - with: - clean: false - branch: gh-pages - folder: docs + lCharts <- gsm.kri::MakeCharts( + dfResults = gsm.core::reportingResults, + dfGroups = gsm.core::reportingGroups, + dfMetrics = gsm.core::reportingMetrics, + dfBounds = gsm.core::reportingBounds + ) + + gsm.kri::Report_KRI( + lCharts = lCharts, + dfResults = gsm.kri::FilterByLatestSnapshotDate(gsm.core::reportingResults), + dfGroups = gsm.core::reportingGroups, + dfMetrics = gsm.core::reportingMetrics, + strOutputDir = here::here("pkgdown", "assets"), + strOutputFile = "report_kri_site.html" + ) + + ## Country Report + lCharts_country <- gsm.kri::MakeCharts( + dfResults = gsm.core::reportingResults_country, + dfGroups = gsm.core::reportingGroups_country, + dfMetrics = gsm.core::reportingMetrics_country, + dfBounds = gsm.core::reportingBounds_country + ) + + gsm.kri::Report_KRI( + lCharts = lCharts_country, + dfResults = gsm.kri::FilterByLatestSnapshotDate(gsm.core::reportingResults_country), + dfGroups = gsm.core::reportingGroups_country, + dfMetrics = gsm.core::reportingMetrics_country, + strOutputDir = here::here("pkgdown", "assets"), + strOutputFile = "report_kri_country.html" + ) + + + - name: Deploy pkgdown + run: | + git config --local user.email "actions@github.com" + git config --local user.name "GitHub Actions" + Rscript -e 'pkgdown::deploy_to_branch(install = T)' diff --git a/DESCRIPTION b/DESCRIPTION index 08cf0ec..f86cf19 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: openrbqm Title: What the Package Does (One Line, Title Case) -Version: 0.0.0.9000 +Version: 0.0.1 Authors@R: c( person("Jeremy", "Wildfire", email = "jwildfire@gmail.com", role = c("aut", "cre")), person("Laura", "Maxwell", email = "lkmaxwell23@gmail.com", role = c("aut")) @@ -23,15 +23,17 @@ Imports: rstudioapi, utils Remotes: - gsm.core=Gilead-BioStats/gsm.core@main, + gsm.core=Gilead-BioStats/gsm.core@v1.1.0, gsm.app=Gilead-BioStats/gsm.app, - gsm.datasim=Gilead-BioStats/gsm.datasim@dev, - gsm.kri=Gilead-BioStats/gsm.kri@main, - gsm.mapping=Gilead-BioStats/gsm.mapping@main, - gsm.reporting=Gilead-BioStats/gsm.reporting@main, - gsm.qc=Gilead-BioStats/gsm.qc@main + gsm.datasim=Gilead-BioStats/gsm.datasim@v1.0.0, + gsm.kri=Gilead-BioStats/gsm.kri@v1.1.0, + gsm.mapping=Gilead-BioStats/gsm.mapping@v1.0.1, + gsm.reporting=Gilead-BioStats/gsm.reporting@v1.0.1, + gsm.qc=Gilead-BioStats/gsm.qc@v1.0.1 Suggests: knitr, - rmarkdown + rmarkdown, + DT, + gt VignetteBuilder: knitr URL: https://openrbqm.github.io/openrbqm/ diff --git a/R/attach.R b/R/attach.R index d87d774..17026f6 100644 --- a/R/attach.R +++ b/R/attach.R @@ -70,10 +70,6 @@ openrbqm_detach <- function() { #' List all packages imported by openrbqm #' -#' @export -#' -#' @examples -#' #openrbqm_packages() openrbqm_packages <- function() { # get all imports from openrbqm's package description file raw <- utils::packageDescription("openrbqm")$Imports diff --git a/README.md b/README.md index 2af151d..c2e9613 100644 --- a/README.md +++ b/README.md @@ -10,50 +10,4 @@ Collection of R Packages designed for Risk Based Quality Management 5. gsm.reporting 6. gsm.app 7. gsm.template -8. grail 9. gsm.datasim - - -## Requirements before `{openrbqm}` v1.0.0 release - -### Critical outstanding issues - -#### Overview - -- [ ] `{gsm.reporting}` repo needs to be created and reporting functions and workflows be migrated -- [ ] `{gsm.core}` stripped down after mapping, kri and reporting repo contents confirmed and with at least one release. - - [ ] all examples and most vignettes moved to `{openrbqm}` -- [ ] `{gsm.mapping}` v1.0.0 release candidate with stripped `{gsm}` dev version (or `{gsm.core}` v2.3.0-rc) as dependency -- [ ] `{gsm.kri}` v1.0.0 release candidate with `{gsm.mapping}` v1.0.0-rc and stripped `{gsm.core}` v2.3.0-rc as dependencies -- [ ] `{gsm.endpoints}` mapping edits - - [ ] direct from raw mapping yamls moved to `{gsm.mapping}` with more specific file names - - [ ] update dependencies appropriately - -#### Detailed outstanding issues - -- `{gsm.core}` issues - - [ ] [Feature: Migrate Mapping components to gsm.mapping](https://github.com/Gilead-BioStats/gsm/issues/1972) - - available on gsm branch `fix-1972v2` in this [PR](https://github.com/Gilead-BioStats/gsm/pull/2002) - - [ ] [Feature: Remove gsm.kri functions, workflows and tests](https://github.com/Gilead-BioStats/gsm/issues/2013) - - available on gsm branch `fix-2013` - - [ ] [Feature: Ensure all workflows are migrated to their respective packages](https://github.com/Gilead-BioStats/gsm/issues/2014) - - [ ] [Feature: Migrate reporting vignette to {gsm.reporting}](https://github.com/Gilead-BioStats/gsm/issues/2015) - - [ ] [Feature: Migrate vignettes to {openrbqm} package](https://github.com/Gilead-BioStats/gsm/issues/2016) - - [ ] [Feature: Migrate workflow/reporting qualification tests to {gsm.kri}](https://github.com/Gilead-BioStats/gsm/issues/2017) - - [ ] [QC: remove util- prefix in file names](https://github.com/Gilead-BioStats/gsm/issues/2026) -- `{gsm.mapping}` issues - - [x] [Write a vignette and template for how to request a new domain/variable](https://github.com/Gilead-BioStats/gsm.mapping/issues/4) - - [ ] [Write a vignette displaying all mapped dfs and variables ](https://github.com/Gilead-BioStats/gsm.mapping/issues/3) - - Begun, and merged into dev via PR #18, but needs some editing to fully address the issue -- `{gsm.kri}` issues - - [ ] [Migrate Vignettes and make pkgdown site](https://github.com/Gilead-BioStats/gsm.kri/issues/9) - - begun on gsm branch `fix-9` -- `{gsm.endpoints}` issues - - [ ] [Feature: Move rawplus mapping yamls to the gsm.mapping package](https://github.com/Gilead-BioStats/gsm.endpoints/issues/134) - - [ ] [Feature: Update names and meta information of workflows to be more specific](https://github.com/Gilead-BioStats/gsm.endpoints/issues/133) -- `{gsm.reporting}` issues - -### Nice to haves -- `{gsm.template}` issues -- `{gsm.datasim}` issues - - add `{gsm.endpoints}` domains to available data and vars. [(Issue here)](https://github.com/Gilead-BioStats/gsm.datasim/issues/34) diff --git a/_pkgdown.yml b/_pkgdown.yml index 9dfc68f..7e7c23a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -7,4 +7,11 @@ navbar: structure: left: [articles, Reports, reference, news] right: [search, github] - + components: + Reports: + text: Sample Reports + menu: + - text: Site Report + href: report_kri_site.html + - text: Country Report + href: report_kri_country.html diff --git a/inst/examples/1_AdverseEventKRI.R b/inst/examples/1_AdverseEventKRI.R new file mode 100644 index 0000000..edd89ad --- /dev/null +++ b/inst/examples/1_AdverseEventKRI.R @@ -0,0 +1,55 @@ +#### Example 1.1 - Generate an Adverse Event Metric using the standard {gsm.core} workflow + +dfInput <- Input_Rate( + dfSubjects= gsm.core::lSource$Raw_SUBJ, + dfNumerator= gsm.core::lSource$Raw_AE, + dfDenominator = gsm.core::lSource$Raw_SUBJ, + strSubjectCol = "subjid", + strGroupCol = "siteid", + strNumeratorMethod= "Count", + strDenominatorMethod= "Sum", + strDenominatorCol= "timeonstudy" +) + +dfTransformed <- Transform_Rate(dfInput) +dfAnalyzed <- Analyze_NormalApprox(dfTransformed, strType = "rate") +dfFlagged <- Flag_NormalApprox(dfAnalyzed, vThreshold = c(-3,-2,2,3)) +dfSummarized <- Summarize(dfFlagged) + +table(dfSummarized$Flag) + +#### Example 1.2 - Make an SAE Metric by adding a filter. Also works with pipes. + +SAE_KRI <- Input_Rate( + dfSubjects= gsm.core::lSource$Raw_SUBJ, + dfNumerator= gsm.core::lSource$Raw_AE %>% filter(aeser=="Y"), + dfDenominator = gsm.core::lSource$Raw_SUBJ, + strSubjectCol = "subjid", + strGroupCol = "siteid", + strNumeratorMethod= "Count", + strDenominatorMethod= "Sum", + strDenominatorCol= "timeonstudy" +) %>% + Transform_Rate %>% + Analyze_NormalApprox(strType = "rate") %>% + Flag_NormalApprox(vThreshold = c(-3,-2,2,3)) %>% + Summarize + +table(SAE_KRI$Flag) + +### Example 1.3 - Visualize Metric distribution using Bar Charts using provided htmlwidgets +library(gsm.kri) + +labels <- list( + Metric= "Serious Adverse Event Rate", + Numerator= "Serious Adverse Events", + Denominator= "Days on Study" +) + +gsm.kri::Widget_BarChart(dfResults = SAE_KRI, lMetric=labels, strOutcome="Metric") +gsm.kri::Widget_BarChart(dfResults = SAE_KRI, lMetric=labels, strOutcome="Score") +gsm.kri::Widget_BarChart(dfResults = SAE_KRI, lMetric=labels, strOutcome="Numerator") + +### Example 1.4 - Create Scatter plot with confidence bounds +dfBounds <- Analyze_NormalApprox_PredictBounds(SAE_KRI, vThreshold = c(-3,-2,2,3)) +gsm.kri::Widget_ScatterPlot(SAE_KRI, lMetric = labels, dfBounds = dfBounds) diff --git a/inst/examples/2_AdverseEventWorkflow.R b/inst/examples/2_AdverseEventWorkflow.R new file mode 100644 index 0000000..8ba5bc9 --- /dev/null +++ b/inst/examples/2_AdverseEventWorkflow.R @@ -0,0 +1,128 @@ +library(gsm.mapping) +library(gsm.kri) + +#### Example 2.1 - Configurable Adverse Event Workflow + +# Define YAML workflow +AE_workflow <- read_yaml(text= +'meta: + Type: Analysis + ID: kri0001 + GroupLevel: Site + Abbreviation: AE + Metric: Adverse Event Rate + Numerator: Adverse Events + Denominator: Days on Study + Model: Normal Approximation + Score: Adjusted Z-Score + AnalysisType: rate + Threshold: -2,-1,2,3 + nMinDenominator: 30 +spec: + Mapped_AE: + subjid: + type: character + Mapped_SUBJ: + subjid: + type: character + invid: + type: character + timeonstudy: + type: integer +steps: + - output: vThreshold + name: ParseThreshold + params: + strThreshold: Threshold + - output: Analysis_Input + name: Input_Rate + params: + dfSubjects: Mapped_SUBJ + dfNumerator: Mapped_AE + dfDenominator: Mapped_SUBJ + strSubjectCol: subjid + strGroupCol: invid + strGroupLevel: GroupLevel + strNumeratorMethod: Count + strDenominatorMethod: Sum + strDenominatorCol: timeonstudy + - output: Analysis_Transformed + name: Transform_Rate + params: + dfInput: Analysis_Input + - output: Analysis_Analyzed + name: Analyze_NormalApprox + params: + dfTransformed: Analysis_Transformed + strType: AnalysisType + - output: Analysis_Flagged + name: Flag_NormalApprox + params: + dfAnalyzed: Analysis_Analyzed + vThreshold: vThreshold + - output: Analysis_Summary + name: Summarize + params: + dfFlagged: Analysis_Flagged + nMinDenominator: nMinDenominator + - output: lAnalysis + name: list + params: + ID: ID + Analysis_Input: Analysis_Input + Analysis_Transformed: Analysis_Transformed + Analysis_Analyzed: Analysis_Analyzed + Analysis_Flagged: Analysis_Flagged + Analysis_Summary: Analysis_Summary +') + +# Run the workflow +lMappingWorkflows <- MakeWorkflowList( + c("AE", "SUBJ"), + strPath = here::here("tests/testthat/testdata/mappings"), + bExact = TRUE +) +mappings_spec <- gsm.mapping::CombineSpecs(lMappingWorkflows) +lRawData <- gsm.mapping::Ingest(gsm.core::lSource, mappings_spec) +AE_data <-list( + Mapped_SUBJ= lRawData$Raw_SUBJ, + Mapped_AE= lRawData$Raw_AE +) +AE_KRI <- RunWorkflow(lWorkflow = AE_workflow, lData = AE_data) + +# Create Barchart from workflow +Widget_BarChart(dfResults = AE_KRI$Analysis_Summary) + +#### Example 2.2 - Run Country-Level Metric +AE_country_workflow <- AE_workflow +AE_country_workflow$meta$GroupLevel <- "Country" +AE_country_workflow$steps[[2]]$params$strGroupCol <- "country" + +AE_country_KRI <- RunWorkflow(lWorkflow = AE_country_workflow, lData = AE_data) +gsm.kri::Widget_BarChart(dfResults = AE_country_KRI$Analysis_Summary, lMetric = AE_country_workflow$meta) + +#### Example 2.3 - Create SAE workflow + +# Tweak AE workflow metadata +SAE_workflow <- AE_workflow +SAE_workflow$meta$File <- "SAE_KRI" +SAE_workflow$meta$Metric <- "Serious Adverse Event Rate" +SAE_workflow$meta$Numerator <- "Serious Adverse Events" + +# Add a step to filter out non-serious AEs `RunQuery` +filterStep <- list(list( + name = "RunQuery", + output = "Mapped_AE", + params= list( + df= "Mapped_AE", + strQuery = "SELECT * FROM df WHERE aeser = 'Y'" + )) +) +SAE_workflow$steps <- SAE_workflow$steps %>% append(filterStep, after=0) + +# Run the updated workflow +SAE_KRI <- RunWorkflow(lWorkflow = SAE_workflow, lData = AE_data ) +gsm.kri::Widget_BarChart(dfResults = SAE_KRI$Analysis_Summary, lMetric = SAE_workflow$meta) + + + diff --git a/inst/examples/3_ReportingWorkflow.R b/inst/examples/3_ReportingWorkflow.R new file mode 100644 index 0000000..71a70d6 --- /dev/null +++ b/inst/examples/3_ReportingWorkflow.R @@ -0,0 +1,97 @@ +library(gsm.core) +library(gsm.mapping) +library(gsm.kri) +library(gsm.reporting) +library(dplyr) + +#### 3.1 - Create a KRI Report using 12 standard metrics in a step-by-step workflow + +core_mappings <- c("AE", "COUNTRY", "DATACHG", "DATAENT", "ENROLL", "LB", + "PD", "QUERY", "STUDY", "STUDCOMP", "SDRGCOMP", "SITE", "SUBJ") + +# Step 0 - Create Raw Data from Source Data +lRaw <- list( + Raw_SUBJ = gsm.core::lSource$Raw_SUBJ, + Raw_AE = gsm.core::lSource$Raw_AE, + Raw_PD = gsm.core::lSource$Raw_PD %>% + rename(subjid = subjectenrollmentnumber), + Raw_LB = gsm.core::lSource$Raw_LB, + Raw_STUDCOMP = gsm.core::lSource$Raw_STUDCOMP %>% + select(subjid, compyn), + Raw_SDRGCOMP = gsm.core::lSource$Raw_SDRGCOMP, + Raw_DATACHG = gsm.core::lSource$Raw_DATACHG %>% + rename(subject_nsv = subjectname), + Raw_DATAENT = gsm.core::lSource$Raw_DATAENT %>% + rename(subject_nsv = subjectname), + Raw_QUERY = gsm.core::lSource$Raw_QUERY %>% + rename(subject_nsv = subjectname), + Raw_ENROLL = gsm.core::lSource$Raw_ENROLL, + Raw_SITE = gsm.core::lSource$Raw_SITE %>% + rename(studyid = protocol) %>% + rename(invid = pi_number) %>% + rename(InvestigatorFirstName = pi_first_name) %>% + rename(InvestigatorLastName = pi_last_name) %>% + rename(City = city) %>% + rename(State = state) %>% + rename(Country = country) %>% + rename(Status = site_status), + Raw_STUDY = gsm.core::lSource$Raw_STUDY %>% + rename(studyid = protocol_number) %>% + rename(Status = status) +) + +# Step 1 - Create Mapped Data Layer - filter, aggregate and join raw data to create mapped data layer +mappings_wf <- gsm.core::MakeWorkflowList(strNames = core_mappings, strPath = "workflow/1_mappings", strPackage = "gsm.mapping") +mapped <- gsm.core::RunWorkflows(mappings_wf, lRaw) + +# Step 2 - Create Metrics - calculate metrics using mapped data +metrics_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/2_metrics", strPackage = "gsm.kri") +analyzed <- gsm.core::RunWorkflows(metrics_wf, mapped) + +# Step 3 - Create Reporting Layer - create reports using metrics data +reporting_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/3_reporting", strPackage = "gsm.reporting") +reporting <- gsm.core::RunWorkflows(reporting_wf, c(mapped, list(lAnalyzed = analyzed, + lWorkflows = metrics_wf))) + +# Step 4 - Create KRI Reports - create KRI report using reporting data +module_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/4_modules", strPackage = "gsm.kri") +lReports <- gsm.core::RunWorkflows(module_wf, reporting) + +#### 3.2 - Automate data ingestion using Ingest() and CombineSpecs() +# Step 0 - Data Ingestion - standardize tables/columns names +mappings_wf <- gsm.core::MakeWorkflowList(strNames = core_mappings, strPath = "workflow/1_mappings", strPackage = "gsm.mapping") +mappings_spec <- gsm.mapping::CombineSpecs(mappings_wf) +lRaw <- gsm.mapping::Ingest(gsm.core::lSource, mappings_spec) + +# Step 1 - Create Mapped Data Layer - filter, aggregate and join raw data to create mapped data layer +mapped <- gsm.core::RunWorkflows(mappings_wf, lRaw) + +# Step 2 - Create Metrics - calculate metrics using mapped data +metrics_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/2_metrics", strPackage = "gsm.kri") +analyzed <- gsm.core::RunWorkflows(metrics_wf, mapped) + +# Step 3 - Create Reporting Layer - create reports using metrics data +reporting_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/3_reporting", strPackage = "gsm.reporting") +reporting <- gsm.core::RunWorkflows(reporting_wf, c(mapped, list(lAnalyzed = analyzed, + lWorkflows = metrics_wf))) + +# Step 4 - Create KRI Report - create KRI report using reporting data +module_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/4_modules", strPackage = "gsm.kri") +lReports <- gsm.core::RunWorkflows(module_wf, reporting) + + +#### 3.3 Site-Level KRI Report with multiple SnapshotDate +# Below relies on the clindata stuff, do we need to rerun/rewrite reporting datasets? +lCharts <- gsm.kri::MakeCharts( + dfResults = gsm.core::reportingResults, + dfGroups = gsm.core::reportingGroups, + dfMetrics = gsm.core::reportingMetrics, + dfBounds = gsm.core::reportingBounds +) + +kri_report_path <- gsm.kri::Report_KRI( + lCharts = lCharts, + dfResults = gsm.kri::FilterByLatestSnapshotDate(reportingResults), + dfGroups = gsm.core::reportingGroups, + dfMetrics = gsm.core::reportingMetrics +) diff --git a/inst/examples/4_WorkflowIO.R b/inst/examples/4_WorkflowIO.R new file mode 100644 index 0000000..18d347d --- /dev/null +++ b/inst/examples/4_WorkflowIO.R @@ -0,0 +1,92 @@ +devtools::load_all() + +LoadData <- function(lWorkflow, lConfig, lData = NULL) { + lData <- lData + purrr::imap( + lWorkflow$spec, + ~ { + input <- lConfig$Domains[[ .y ]] + + if (is.data.frame(input)) { + data <- input + } else if (is.function(input)) { + data <- input() + } else if (is.character(input)) { + data <- read.csv(input) + } else { + cli::cli_abort("Invalid data source: {input}.") + } + + lData[[ .y ]] <<- (ApplySpec(data, .x)) + } + ) + return(lData) +} + +SaveData <- function(lWorkflow, lConfig) { + domain <- paste0(lWorkflow$meta$Type, '_', lWorkflow$meta$ID) + cli::cli_alert_info(domain) + + if (exists(domain, lConfig$Domains)) { + output <- lConfig$Domains[[ domain ]] + cli::cli_alert_info(output) + + cli::cli_alert_info( + 'Saving output of `lWorkflow` to `{output}`.' + ) + + write.csv( + lWorkflow$lResult, + output + ) + } else { + cli::cli_alert_info( + '{domain} not found.' + ) + } +} + +lConfig <- list( + LoadData = LoadData, + SaveData = SaveData, + Domains = c( + Raw_STUDY = function() { gsm.core::lSource$Raw_STUDY }, + Raw_SITE = function() { gsm.core::lSource$Raw_SITE }, + Raw_PD = function() { gsm.core::lSource$Raw_PD }, + + Raw_SUBJ = function() { gsm.core::lSource$Raw_SUBJ }, + Raw_ENROLL = function() { gsm.core::lSource$Raw_ENROLL }, + Raw_SDRGCOMP = function() { gsm.core::lSource$Raw_SDRGCOMP }, + Raw_STUDCOMP = function() { gsm.core::lSource$Raw_STUDCOMP }, + Raw_LB = function() { gsm.core::lSource$Raw_LB }, + Raw_AE = function() { gsm.core::lSource$Raw_AE }, + + Raw_DATAENT = function() { gsm.core::lSource$Raw_DATAENT }, + Raw_DATACHG = function() { gsm.core::lSource$Raw_DATACHG }, + Raw_QUERY = function() { gsm.core::lSource$Raw_QUERY }, + + Mapped_STUDY = file.path(tempdir(), 'mapped-study.csv'), + Mapped_SITE = file.path(tempdir(), 'mapped-site.csv'), + Mapped_COUNTRY = file.path(tempdir(), 'mapped-country.csv'), + Mapped_PD = file.path(tempdir(), 'mapped-pd.csv'), + + Mapped_SUBJ = file.path(tempdir(), 'mapped-subj.csv'), + Mapped_ENROLL = file.path(tempdir(), 'mapped-enroll.csv'), + Mapped_SDRGCOMP = file.path(tempdir(), 'mapped-sdrgcomp.csv'), + Mapped_STUDCOMP = file.path(tempdir(), 'mapped-studcomp.csv'), + Mapped_LB = file.path(tempdir(), 'mapped-lb.csv'), + Mapped_AE = file.path(tempdir(), 'mapped-ae.csv'), + + Mapped_DATAENT = file.path(tempdir(), 'mapped-dataent.csv'), + Mapped_DATACHG = file.path(tempdir(), 'mapped-datachg.csv'), + Mapped_QUERY = file.path(tempdir(), 'mapped-query.csv') + ) +) + +core_mappings <- c("AE", "COUNTRY", "DATACHG", "DATAENT", "ENROLL", "LB", + "PD", "QUERY", "STUDY", "STUDCOMP", "SDRGCOMP", "SITE", "SUBJ") + +lMappedData <- RunWorkflows( + MakeWorkflowList(strNames = core_mappings, strPath = 'workflow/1_mappings', strPackage = "gsm.mapping"), + lConfig = lConfig +) diff --git a/inst/examples/_setup.R b/inst/examples/_setup.R new file mode 100644 index 0000000..f3a2ae6 --- /dev/null +++ b/inst/examples/_setup.R @@ -0,0 +1,2 @@ +devtools::install_github('gilead-biostats/gsm.core@dev') +# or gsm.core git checkout branch and devtools::load_all() diff --git a/vignettes/.gitignore b/vignettes/.gitignore index 097b241..9618e1a 100644 --- a/vignettes/.gitignore +++ b/vignettes/.gitignore @@ -1,2 +1,3 @@ *.html *.R +qualification.log diff --git a/vignettes/Cookbook.Rmd b/vignettes/Cookbook.Rmd index 58d560f..64b3ae5 100644 --- a/vignettes/Cookbook.Rmd +++ b/vignettes/Cookbook.Rmd @@ -1,5 +1,6 @@ --- title: "Cookbook" +description: "Sample code showing how to use the Good Statistical Monitoring {gsm} suite of packages using sample data from {gsm.core}." output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Cookbook} @@ -8,7 +9,7 @@ vignette: > --- ```{r setup, include = FALSE} -library(gsm) +library(gsm.core) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" @@ -17,7 +18,7 @@ knitr::opts_chunk$set( # Introduction -This vignette contains sample code showing how to use the Good Statistical Monitoring `{gsm}` package using sample data from [`{clindata}`](https://github.com/Gilead-BioStats/clindata). For more information on the `{gsm}` package see the [package homepage](https://gilead-biostats.github.io/gsm/). +This vignette contains sample code showing how to use the Good Statistical Monitoring `{gsm}` suite of packages using sample data from `{gsm.core}`. For more information on the `{gsm}` suite of packages see the [package homepage](https://gilead-biostats.github.io/gsm.core/). # Setup and Installation @@ -27,48 +28,56 @@ Run the following: ## Install devtools install.packages('devtools') -## Install and load sample raw data -devtools::install_github("Gilead-BioStats/clindata", ref = "main") -library(clindata) - ## Install and load gsm -devtools::install_github("Gilead-BioStats/gsm", ref = "main") -library(gsm) +devtools::install_github("Gilead-BioStats/gsm.core", ref = "main") +library(gsm.core) + +## Install and load gsm.mapping +devtools::install_github("Gilead-BioStats/gsm.mapping", ref = "main") +library(gsm.mapping) + +## Install and load gsm.kri +devtools::install_github("Gilead-BioStats/gsm.kri", ref = "main") +library(gsm.kri) + +## Install and load gsm.reporting +devtools::install_github("Gilead-BioStats/gsm.reporting", ref = "main") +library(gsm.kri) ``` # Example 1 - Adverse Events Metric - Scripted -This example uses the standard {gsm} analysis workflows to creates site-level Adverse Event scripts. See the [Data Analysis Vignette](https://gilead-biostats.github.io/gsm/articles/DataAnalysis.html) for more detail. +This example uses the standard {gsm} analysis workflows to creates site-level Adverse Event scripts. See the [Data Analysis Vignette](https://gilead-biostats.github.io/gsm.core/articles/DataAnalysis.html) for more detail. - **Example 1.1** calculates the Site-level AE rates. - **Example 1.2** adds a filter to include only Serious Adverse Events (SAEs) and implements pipes to run through the workflow. -- **Example 1.3** generates bar charts showing SAE rates and z-scores by study. -- **Example 1.4** generates a scatter plot with confidence bound for SAE rates. +- **Example 1.3** generates bar charts showing SAE rates and z-scores by study using `{gsm.kri}`. +- **Example 1.4** generates a scatter plot with confidence bound for SAE rates using `{gsm.kri}`. -```{r file = system.file("examples", "1_AdverseEventKRI.R", package = "gsm"), eval = FALSE, include = TRUE} +```{r file = system.file("examples", "1_AdverseEventKRI.R", package = "gsm.core"), eval = FALSE, include = TRUE} ``` # Example 2 - Adverse Events Metrics - Workflow -This examples introduces YAML workflows to re-generate the same results as in **Example 1** via a reusable pipeline. See the [Data Model Vignette](https://gilead-biostats.github.io/gsm/articles/DataModel.html) for more detail. +This examples introduces YAML workflows to re-generate the same results as in **Example 1** via a reusable pipeline. See the [Data Model Vignette](https://gilead-biostats.github.io/gsm.core/articles/DataModel.html) for more detail. - **Example 2.1** runs the AE KRI workflow. - **Example 2.2** updates the metadata to run country-level metrics. - **Example 2.3** adds a filtering step to the workflow to generate the SAE metric. -```{r file = system.file("examples", "2_AdverseEventWorkflow.R", package = "gsm"), eval = FALSE, include = TRUE} +```{r file = system.file("examples", "2_AdverseEventWorkflow.R", package = "gsm.core"), eval = FALSE, include = TRUE} ``` # Example 3 - Study-Level Reporting Workflows -This example extends the previous examples to generate charts and reports for multiple KRIs. See the [Data Reporting Vignette](https://gilead-biostats.github.io/gsm/articles/DataReporting.html) for more detail. +This example extends the previous examples to generate charts and reports for multiple KRIs. See the [Data Reporting Vignette](https://gilead-biostats.github.io/gsm.reporting/articles/DataReporting.html) for more detail. - **Example 3.1** steps through several workflows to generate a report for all 12 standard site-level KRIs. -- **Example 3.2** automates data ingestion using `Ingest()` and `CombineSpecs()`. -- **Example 3.3** generates a report incorporating multiple timepoints using the sample `reporting` data saved as part of {gsm}. +- **Example 3.2** automates data ingestion using `gsm.mapping::Ingest()` and `gsm.mapping::CombineSpecs()`. +- **Example 3.3** generates a report using `{gsm.kri}` incorporating multiple timepoints using the sample `reporting` data saved as part of `{gsm.core}`. -```{r file = system.file("examples", "3_ReportingWorkflow.R", package = "gsm"), eval = FALSE, include = TRUE} +```{r file = system.file("examples", "3_ReportingWorkflow.R", package = "gsm.core"), eval = FALSE, include = TRUE} ``` diff --git a/vignettes/DataAnalysis.Rmd b/vignettes/DataAnalysis.Rmd new file mode 100644 index 0000000..eed5d99 --- /dev/null +++ b/vignettes/DataAnalysis.Rmd @@ -0,0 +1,223 @@ +--- +title: "Step-by-Step Analysis Workflow" +description: "This vignette walks users through the mechanics of the functions that produce all of the Analysis workflow output within the `{gsm.core}` package." +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Step-by-Step Analysis Workflow} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) + +library(gsm.core) +library(gt) +library(DT) +``` + +# Introduction + +This vignette walks users through the mechanics of the functions that produce all of the Analysis workflow output within the `{gsm.core}` package. The suite of `{gsm}` packages leverages Key Risk Indicators (KRIs) and thresholds to conduct study-level and site-level Risk Based Monitoring for clinical trials. + +These functions provide data frames, visualizations, and metadata to be used in reporting and error checking at clinical sites. The image below illustrates the supporting functions that feed into the yaml workflow that is specified in each analysis workflow. + +![](data_analysis.png){width="100%"} + +All of these functions will run automatically and sequentially when a user calls upon the `RunWorkflow()` function with a specified yaml file for KRI metrics found in the `workflow/2_metrics` directory of the [`{gsm.kri}`](https://github.com/Gilead-BioStats/gsm.kri) package. + +Each of these individual functions can also be run independently outside of a specified yaml workflow. + +For the purposes of this documentation, we will evaluate the input(s) and output(s) of each individual function for a specific KRI to show the stepwise progression of how a yaml workflow is set up to handle and process data. + +------------------------------------------------------------------------ + +## Case Study - Step-by-Step Adverse Event KRI + +We will use sample clinical data simulated with the [`{gsm.datasim}`](https://github.com/Gilead-BioStats/gsm.datasim) package to run the Adverse Events (AE) Assessment, i.e., `AE_Assess()`, using the normal approximation method. + +Additional statistical methods and supporting functions are explored in [Appendix 1](#appendix-1). + +### 1. Create `dfInput` + +Start by creating `dfInput` using sample rawplus data created with `{gsm.datasim}`. Note that `Input_Rate()` requires three specific clinical datasets, which include a subject-level demographics/exposure dataset (`dfSubjects`) and a domain-level dataset (`dfNumerator`) that records every adverse event per subject. + +Since `Input_Rate()` is a generalized function, it is also required that you specify the relevant column names for the Subject (`strSubjectCol`), Group (`strGroupCol`) and optionally the Denominator (`strDenominatorCol`) and Numerator (`strNumeratorCol`) when it is not simply "Denominator" or "Numerator", respectively. + +Finally, the method for calculating the Numerator and Denominator is specified in `strNumeratorMethod` and `strDenominatorMethod` as either "Count" or "Sum". If the method is "Count", the function simply counts the number of rows in the provided data frame. If the numerator method is "Sum", the function takes the sum of the values in the specified column (`strNumeratorCol` or `strDenominatorCol`). + + +```{r include = TRUE} +dfInput <- Input_Rate( + dfSubjects = gsm.core::lSource$Raw_SUBJ, + dfNumerator = gsm.core::lSource$Raw_AE, + dfDenominator = gsm.core::lSource$Raw_SUBJ, + strSubjectCol = "subjid", + strGroupCol = "invid", + strNumeratorMethod = "Count", + strDenominatorMethod = "Sum", + strDenominatorCol = "timeonstudy" +) +``` + +The data frame `dfInput` for an AE assessment will be created by running `Input_Rate()` and will have one record per subject, with the following columns: + +- `SubjectID`: Subject Identifier +- `GroupID`: Group Identifier +- `GroupLevel`: Type of Group specified in `GroupID` (Country, Site) +- `Numerator`: Total Time on Treatment (measured in days; per subject) +- `Denominator`: Total Number of Event(s) of Interest (in this example, the number of AEs reported; per subject) +- `Metric`: Rate of Event Incidence (calculated as `Exposure`/`Count`; per subject) + +```{r echo = FALSE} +datatable(dfInput) %>% + formatRound(columns = "Metric", digits = 3) +``` + +------------------------------------------------------------------------ + +### 2. Create `dfTransformed` + +The data frame `dfTransformed` is derived from `dfInput` using a `Transform()` function. In our example, the analysis pipeline pulls in `Transform_Rate()` since the default metric for AEs is the number of AEs reported over the course of treatment per site, i.e., a rate. + +```{r include = TRUE} +dfTransformed <- Transform_Rate(dfInput) +``` + +The resulting `dfTransformed` data frame will contain site-level transformed data, including KRI calculation. Using our example AE data, `dfTransformed` contains the following columns: + +- `GroupID`: Group Identifier (default is Site ID) +- `GroupLevel`: Type of Group specified in `GroupID` (Country, Site) +- `Numerator`: Cumulative Number of Event(s) of Interest (in this example, number of AEs reported across subjects) +- `Denominator`: Cumulative Time on Treatment (in days, across subjects) +- `Metric`: Rate of Event(s) of Interest (in this example, number of AEs reported over the course of treatment in days) + +```{r, echo = FALSE} +datatable(dfTransformed) %>% +formatRound(columns = "Metric", digits = 3) +``` + +------------------------------------------------------------------------ + +### 3. Create `dfAnalyzed` + +The data frame `dfAnalyzed` is derived from `dfTransformed` using an `Analyze()` function, which incorporates a specific statistical model. The resulting `dfAnalyzed` data frame will contain site-level analysis results data. The normal approximation method is the default statistical model for AE data, so the analysis pipeline automatically runs `Analyze_NormalApprox()`. + +```{r include = TRUE} +dfAnalyzed <- Analyze_NormalApprox(dfTransformed) +``` + +Using our example AE data, `dfAnalyzed` contains the following columns: + +- `GroupID`: Group Identifier (default is Site ID) +- `GroupLevel`: Type of Group specified in `GroupID` (Country, Site) +- `Numerator`: Cumulative Number of Event(s) of Interest (in this example, number of AEs reported across subjects); Carried from `dfTransformed`. +- `Denominator`: Cumulative Time on Treatment (in days, across subjects); Carried from `dfTransformed`. +- `Metric`: Rate of Event(s) of Interest (in this example, number of AEs reported over the course of treatment in days); Carried from `dfTransformed`. +- `OverallMetric`: Aggregate metric for the group that is being assessed. ( sum(Numerator) / sum(Denominator) ). +- `Factor`: Calculated over-dispersion adjustment factor (mean of the z-score sum of squares calculated in the analysis functions). +- `Score`: Calculated Residual (per site). + +```{r, echo = FALSE} +datatable(dfAnalyzed) %>% + formatRound(columns = c("Metric", "OverallMetric", "Factor", "Score"), digits = 3) +``` + +------------------------------------------------------------------------ + +### 4. Create `dfFlagged` + +The data frame `dfFlagged` is derived from `dfAnalyzed` using the `Flag()` function. The resulting `dfFlagged` data frame will contain site-level analysis results data with flagging incorporated based on a pre-specified statistical threshold to highlight possible outliers. + +```{r include = TRUE} +dfFlagged <- Flag(dfAnalyzed, vThreshold = c(-3, -2, 2, 3)) +``` + +The default flagging function for the normal approximation method is `Flag()` and the default threshold is (-3, -2, 2, 3). Using our example AE data, `dfFlagged` contains the following columns: + +- `GroupID`: Group Identifier (default is Site ID) +- `GroupLevel`: Type of Group specified in `GroupID` (Country, Site) +- `Numerator`: Cumulative Number of Event(s) of Interest (in this example, number of AEs reported across subjects); Carried from `dfAnalyzed` +- `Denominator`: Cumulative Time on Treatment (in days, across subjects); Carried from `dfAnalyzed` +- `Metric`: Rate of Event(s) of Interest (in this example, number of AEs reported over the course of treatment in days); Carried from `dfAnalyzed` +- `OverallMetric`: Aggregate metric for the group that is being assessed. ( sum(Numerator) / sum(Denominator) ). +- `Factor`: Calculated over-dispersion adjustment factor (mean of the z-score sum of squares calculated in the analysis functions); Carried from `dfAnalyzed`. +- `Score`: Calculated Residual (per site); Carried from `dfAnalyzed` +- `Flag`: Flag Indicating Possible Statistical Outliers; Valid values for this variable include -2, -1, 0, 1, and 2, which determine the "extremeness" of the outlier. -2 and 2 represent more extreme outliers, -1 and 1 represent less extreme outliers, and 0 represents a non-outlier. + +```{r, echo = FALSE} +datatable(dfFlagged) +``` + +------------------------------------------------------------------------ + +### 5. Create `dfSummary` + +The data frame `dfSummary` is derived from `dfFlagged` using the `Summarize()` function. The resulting `dfSummary` data frame will contain the most relevant columns from `dfFlagged` with data sorted in a meaningful way to provide a concise overview of the assessment. Flagged sites will sort earlier than non-flagged sites, with the more "extreme" outliers displayed first. The columns in `dfSummary` include: + +- `GroupID`: Group Identifier (default is Site ID) +- `GroupLevel`: Type of Group specified in `GroupID` (Country, Site) +- `Numerator`: Cumulative Number of Event(s) of Interest (in this example, number of AEs reported across subjects); Carried from `dfAnalyzed` +- `Denominator`: Cumulative Time on Treatment (in days, across subjects); Carried from `dfAnalyzed` +- `Metric`: Rate of Event(s) of Interest (in this example, number of AEs reported over the course of treatment in days) +- `Score`: Calculated Residual (per site) +- `Flag`: Flag Indicating Possible Statistical Outliers; Valid values for this variable include -2, -1, 0, 1, and 2, which determine the "extremeness" of the outlier. -2 and 2 represent more extreme outliers, -1 and 1 represent less extreme outliers, and 0 represents a non-outlier. + +```{r include = TRUE} +dfSummary <- Summarize(dfFlagged) +``` + +```{r, echo = FALSE} +datatable(dfSummary[-1,]) +``` + +------------------------------------------------------------------------ + +# Recap - Normal Approximation Adverse Event KRI + +- `dfInput` used as original input using `Input_Rate()` +- `dfTransformed` created from `dfInput` using `Transform_Rate()` +- `dfAnalyzed` created from `dfTransformed` using `Analyze_NormalApprox()` +- `dfFlagged` created from `dfAnalyzed` using `Flag_NormalApprox()` +- `dfSummary` created from `dfFlagged` using `Summarize()` + +------------------------------------------------------------------------ + +# Appendix 1 - Supporting Functions {#appendix-1} + +The following sections include various examples of supporting functions and statistical models that can be employed in the Analysis workflow. Please note that this is **not** an exhaustive list, but includes some of the most commonly called upon functions. + +### Mapping Functions + +- `RunQuery()`: Run a SQL query to create new data.frames with filtering and column name specifications. +- `Input_Rate()`: Calculate a subject level rate from raw numerator and denominator data + +### Transform Functions + +- `Transform_Rate()`: Calculates cumulative rate of Event(s) of Interest per site +- `Transform_Count()`: Calculates cumulative number of Event(s) of Interest per site + +### Analyze Functions + +- `Analyze_NormalApprox()`: Uses funnel plot method with normal approximation to create analysis results for percentage/rate. +- `Analyze_Fisher()`: Uses Fisher's Exact Test to determine if there are non-random associations between a site and a given KRI +- `Analyze_Identity()`: Used in the data pipeline between `Transform()` and `Flag()` functions to rename KRI and Score columns +- `Analyze_Poisson()`: Uses a Poisson model to describe the distribution of events in the overall site population, i.e., determine how many times an event is likely to occur at a site over a specified treatment period + +### Flag Functions + +- `Flag()`: Default flagging function for all assessments +- `Flag_NormalApprox()`: Deprecated flagging function when `Analyze_NormalApprox()` is used for an assessment. +- `Flag_Poisson()`: Deprecated flagging function when `Analyze_Poisson()` is used for an assessment + + + +### What Statistical Models Are Available For Each Assessment? + +- By default, all yaml workflow assessments specified in the `inst/workflow/` directory of the `{gsm.kri}` package use the [normal approximation](https://gilead-biostats.github.io/gsm.core/articles/KRI%20Method.html#the-normal-approximation-method) method. +- Optionally, other statistical methods include: [**Poisson**](https://gilead-biostats.github.io/gsm.core/articles/KRI%20Method.html#the-poisson-regression-method), [**Fisher's Exact**](https://gilead-biostats.github.io/gsm.core/articles/KRI%20Method.html#the-fishers-exact-method), and [**Identity**](https://gilead-biostats.github.io/gsm.core/articles/KRI%20Method.html#the-identity-method). + +![](data_analysis_combined.png){width="100%"} diff --git a/vignettes/DataModel.Rmd b/vignettes/DataModel.Rmd index 58572be..233cd34 100644 --- a/vignettes/DataModel.Rmd +++ b/vignettes/DataModel.Rmd @@ -1,5 +1,6 @@ --- title: "Data Model" +description: "A vignette detailing the data model used in the gsm pipeline." output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Data Model} @@ -7,13 +8,369 @@ vignette: > %\VignetteEncoding{UTF-8} --- -```{r, include = FALSE} +```{r setup, include = FALSE} +library(gsm.core) +library(gt) +library(dplyr) + knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` -```{r setup} -library(openrbqm) +# Introduction + +The `{gsm}` suite of packages provides a standardized data pipeline for conducting study-level Risk Based Quality Management (RBQM) for clinical trials. There are four main types of data used in the `{gsm}` suite of packages: + +- **Raw Data** - Clinical and operational data from study databases +- **Mapped Data** - Data that has been transformed and standardized for analysis +- **Analysis Data** - Data that has been analyzed to calculate Key Risk Indicators (KRIs) +- **Reporting Data** - Data that has been summarized and formatted for reporting + +This vignette provides a high-level overview of how each type of data is used, and includes detailed data specifications as appendices. + +# Data Model Overview + +In general, the `{gsm}` suite of packages is designed to be flexible and customizable, allowing users build custom data pipelines that support many types of raw study data. As shown below, raw clinical data is transformed into mapped data, which is then analyzed to calculate desired metrics. The analysis data is then combined and formatted for reporting with additional raw data, including CTMS data and `{gsm.mapping}` workflow data, which provides relevant metadata for reports. + +![](data_model_simple.png){width="100%"} + +# Raw and Mapped Data + +The `{gsm}` suite of packages is designed to work with a wide variety of clinical data sources. The raw data used in the analysis pipeline is typically sourced from clinical trial databases and is transformed into mapped data using simple transformations. Mapped data is then used as input for the analysis pipeline. + +There is not a single data standard for raw or mapped data in `{gsm.mapping}`. The only requirement is that the mapped data is compatible with the analytics pipeline. Data Mapping transformations can be done using multiple methods including custom R scripts (e.g., with `dplyr`), SQL queries, or using `gsm.mapping` workflows (e.g. the `system.file("workflow/1_mapping/AE.yaml", package = "gsm.mapping")` file). Examples of these methods can be found in `vignette("Cookbook")`. + +# Analysis Data + +In `{gsm.kri}` analysis data is used to capture key metrics associated with the conduct of a clinical trial. As described in the `{gsm.kri}` readme, 12 standard Key Risk Indicator (KRI) metrics are included in the package along with automated workflows that allow them to be run for all sites or countries in a study. Examples of KRIs include the rate of adverse events or amount of missing data at a site or across sites. Defining and deploying KRIs during continually monitor risks to the integrity of the trial and take corrective actions accordingly. + +![](data_analysis.png){width="100%"} + +The image above provides an overview of the default KRI analysis pipeline. The pipeline is a standardized five-step process for assessing data issues by going from participant-level input data to a standardized site-level summary of model results. The functions used in each step of the data pipeline along with the input and output datasets are described in more detail below. + +1. `dfInput`: Input data; Cross-domain participant-level input data with all needed data for KRI derivation. Created by the `Input_Rate()` function used `mapped` data as input. +2. `dfTransformed`: Transformed data; Site-level transformed data including KRI calculation. Created by `Transform_*()` functions using `dfInput` as input. +3. `dfAnalyzed`: Analyzed data; Site-level analysis results data. Created by `Analyze_*()` functions using `dfTransformed` as input. +4. `dfFlagged`: Flagged data; Site-level analysis results with flags added to indicate potential statistical outliers. Created by passing numeric thresholds to a `Flag_*()` function using `dfAnalyzed` as input. +5. `dfSummary`: Summary data; Standardized subset of the flagged data. This summary data has the same structure for all assessments and always includes both KRI and Flag values so that a user can easily look at trends for any given site across multiple assessments. Created using the `Summarize()` function using `dfFlagged` as input. + +The data requirements for each component of the analysis pipeline are rigid; See [Appendix 1](#Appendix-1-Data-Model) for full specifications. + +## Analysis Workflows +Since there are rigid data requirements for each component of the analysis data model, the analysis workflow is largely standardized. There are two main approaches to running the analysis workflow: + +1. **Scripted Analysis**: Run each step of the analysis pipeline individually using the functions provided in the `{gsm}` suite of packages. This approach is useful for understanding the data requirements and for debugging. See Example 1 in `vignette("Cookbook")` for an example of this approach. +2. **Workflow Analysis**: Run the analysis pipeline using a YAML workflow file. This approach is useful for running the same analysis on multiple studies or for automating the analysis process. See Example 2 in `vignette("Cookbook")` for an example of this approach. + +Note that each step in these workflows can be customized based on the requirements for a specific KRI. The graphic below shows four such workflows. + +![](data_analysis_combined.png){width="100%"} + +More details about analysis data pipelines can be found in `vignette("DataAnalysis")`. + +# Reporting Data + +A rigid Reporting Data framework is provided in `{gsm.reporting}` to allow for standardized reporting, visualization and meta-analysis that compare risk profiles across timepoints, and even across multiple studies. The Reporting Data sets used in `{gsm.reporting}` and `{gsm.kri}` are: + +1. `Reporting_Results`: Summary data; Standardized subset of the flagged data. This summary data has the same structure for all assessments and always includes both KRI and Flag values so that a user can easily look at trends for any given site across multiple assessments. Created using the `Summarize()` function in the analytics pipeline, followed by the `BindResults()` function to add columns necessary for reporting and stack metrics and snapshots into a single data.frame. +3. `Reporting_Bounds`: Bounded data; A data.frame containing predicted boundary values with upper and lower bounds across the range of observed values. Created with the `MakeBounds()` function. +4. `Reporting_Groups`: Grouped data; Long data.frame of summarized group CTMS data with site, study, and country level counts and metrics. Constructed by binding data.frames created with `MakeLongMeta()`. +5. `Reporting_Metrics`: Metric metadata; Metric-specific metadata for use in charts and reporting. Created by passing an `lWorkflow` object to the `MakeMetric()` function. + +Similar to Analysis Workflows, reporting data pipelines can be run as R scripts or as YAML workflows. Example 3 in the cookbook vignette shows how to populate the Reporting Data tables using output from the Analysis Workflows and other study data sources. The Reporting Deep Dive Vignette provides more details on the Reporting Data model. + +# Appendix 1 - Data Model + +# Overview +![](data_model_detailed.png){width="100%"} + +# Analytics data model + +The KRI analytics pipeline is a standardized process for **Analyzing** data issues by going from participant-level `input` data to a standardized site-level `summary` of model results. The data sets used in each step of the data pipeline are described in detail below. When using a metric workflow YAML to create these tables, all data tables are contained in a list, which we call `lAnalysis`. This list is then fed into the reporting data pipeline. + +## Analysis Data Tables + +### `Analysis_Input` + - Function(s) used to create table: + - `gsm.core::Input_Rate()` + - Inputs: + - `Analysis_Subjects` + - `Analysis_Numerator` + - `Analysis_Denominator` + - Usage: The base data.frame for all Analysis workflows. Feeds into the `Transform_*()` functions. + - Structure: + + | Table | Column Name | Description | Type | Optional | + |----------|--------------|--------------------------------------|----------|--| + | Analysis_Input | SubjectID | The subject ID | Character| | + | Analysis_Input | GroupID | The group ID for the metric | Character| | | + | Analysis_Input | GroupLevel | The group type for the metric (e.g. "Site") | Character| | + | Analysis_Input | Numerator | The calculated numerator value | Numeric | | + | Analysis_Input | Denominator | The calculated denominator value | Numeric | | + | Analysis_Input | Metric | The calculated rate/metric value | Numeric | | + + +### `Analysis_Transformed` + - Function(s) used to create table: + - `gsm.core::Transform_Rate()` + - `gsm.core::Transform_Count()` + - Inputs: `Analysis_Input` + - Usage: Convert from input data format to needed format to derive KRI for an Assessment via the `Analyze_*()` functions. + - Structure: + + | Table | Column Name | Description | Type | Optional | + |----------|--------------|--------------------------------------|----------|--| + | Analysis_Transformed | GroupID | The group ID for the metric | Character| | | + | Analysis_Transformed | GroupLevel | The group type for the metric (e.g. "Site") | Character| | + | Analysis_Transformed | Numerator | The calculated numerator value | Numeric | | + | Analysis_Transformed | Denominator | The calculated denominator value | Numeric | | + | Analysis_Transformed | Metric | The calculated rate/metric value | Numeric | | + +### `Analysis_Analyzed` + - Function(s) used to create table: + - `gsm.core::Analyze_Fisher()` + - `gsm.core::Analyze_Identity()` + - `gsm.core::Analyze_NormalApprox()` + - `gsm.core::Analyze_Poisson()` + - Inputs: `Analysis_Transformed` + - Usage: Prepare the data for `Flag_*()` by performing the specified test on the metric provided. + - Structure: + + | Table | Column Name | Description | Type | Optional | + |----------|--------------|--------------------------------------|----------|--| + | Analysis_Analyzed | GroupID | The group ID for the metric | Character| | | + | Analysis_Analyzed | GroupLevel | The group type for the metric (e.g. "Site") | Character| | + | Analysis_Analyzed | Numerator | The calculated numerator value | Numeric | | + | Analysis_Analyzed | Denominator | The calculated denominator value | Numeric | | + | Analysis_Analyzed | Metric | The calculated rate/metric value | Numeric | | + | Analysis_Analyzed | Score | The Statistical Score | Numeric | | + | Analysis_Analyzed | Overall Metric | | Numeric |* | + | Analysis_Analyzed | Factor | | Numeric |* | + | Analysis_Analyzed | Predicted Count | | Numeric |* | + + +### `Analysis_Flagged` + - Function(s) used to create table: + - `gsm.core::Flag()` + - Inputs: `Analysis_Analyzed` + - Usage: Flag a group-level metric to be summarized via `gsm.core::Summarize()` and used for reporting. + - Structure: + + | Table | Column Name | Description | Type | Optional | + |----------|--------------|--------------------------------------|----------|--| + | Analysis_Flagged | GroupID | The group ID for the metric | Character| | | + | Analysis_Flagged | GroupLevel | The group type for the metric (e.g. "Site") | Character| | + | Analysis_Flagged | Numerator | The calculated numerator value | Numeric | | + | Analysis_Flagged | Denominator | The calculated denominator value | Numeric | | + | Analysis_Flagged | Metric | The calculated rate/metric value | Numeric | | + | Analysis_Flagged | Score | The Statistical Score | Numeric | | + | Analysis_Flagged | Flag | The ordinal Flag to be applied | Numeric | | + | Analysis_Flagged | Overall Metric | | Numeric |* | + | Analysis_Flagged | Factor | | Numeric |* | + | Analysis_Flagged | Predicted Count | | Numeric |* | + +### `Analysis_Summary` + - Function(s) used to create table: + - `gsm.core::Summarize()` + - Inputs: `Analysis_Flagged` + - Usage: Summarize KRI at the group level for reporting. + - Structure: + + | Table | Column Name | Description | Type | Optional | + |----------|--------------|--------------------------------------|----------|--| + | Analysis_Summary | GroupID | The group ID for the metric | Character| | | + | Analysis_Summary | GroupLevel | The group type for the metric (e.g. "Site") | Character| | + | Analysis_Summary | Numerator | The calculated numerator value | Numeric | | + | Analysis_Summary | Denominator | The calculated denominator value | Numeric | | + | Analysis_Summary | Metric | The calculated rate/metric value | Numeric | | + + +# Overview of Reporting data model + +## Reporting Data Tables + +### `Reporting_Results` + - Function(s) used to create table: + - `gsm.reporting::BindResults()` + - Inputs: `lAnalysis`, `strStudyID`, `dSnapshotDate` + - Workflow used to create table: `3_reporting/Results.yaml` in `{gsm.reporting}` + - Usage: Summarize KRI at the group level for reporting. + - Structure: + + | Table | Column Name | Description | Type | Optional | + |----------|--------------|--------------------------------------|----------|--|-----------| + | Reporting_Results | GroupID | The group ID for the metric | Character| | + | Reporting_Results | GroupLevel | The group type for the metric (e.g. "Site") | Character| | + | Reporting_Results | Numerator | The calculated numerator value | Numeric | | + | Reporting_Results | Denominator | The calculated denominator value | Numeric | | + | Reporting_Results | Metric | The calculated rate/metric value | Numeric | | + | Reporting_Results | Score | The calculated metric score | Numeric | | + | Reporting_Results | Flag | The calculated flag | Numeric | | + | Reporting_Results | MetricID | The Metric ID | Character| * | + | Reporting_Results | StudyID | The Study ID | Character| * | + | Reporting_Results | SnapshotDate | The Date of the snapshot | Date | * | + +### `Reporting_Bounds` + - Function(s) used to create table: + - `gsm.reporting::MakeBounds()` + - `gsm.core::Analyze_NormalApprox_PredictBounds()` (called within `gsm.reporting::MakeBounds()`) + - Inputs: `lAnalysis`, `strStudyID`, `dSnapshotDate` + - Workflow used to create table: `3_reporting/Bounds.yaml` in `{gsm.reporting}` + - Usage: Calculates predicted percentages/rates and upper- and lower-bounds across the full range of sample sizes/total exposure values for reporting. + - Structure: + + | Table | Column Name | Description | Type | Optional | + |----------|--------------|--------------------------------------|----------|--|----------| + | Reporting_Bounds | Threshold | The number of standard deviations that the upper and lower bounds are based on | Numeric| | + | Reporting_Bounds | Denominator | The calculated denominator value | Numeric | | + | Reporting_Bounds | LogDenominator | The calculated log denominator value | Numeric | | + | Reporting_Bounds | Numerator | The calculated numerator value | Numeric | | + | Reporting_Bounds | Metric | The calculated rate/metric value | Numeric | | + | Reporting_Bounds | MetricID | The Metric ID | Character| | + | Reporting_Bounds | StudyID | The Study ID | Character| | + | Reporting_Bounds | SnapshotDate | The Date of the snapshot | Date | | + +### `Reporting_Groups` + - Function(s) used to create table: + - `gsm.reporting::MakeLongMeta()` + - `dplyr::bind_rows()` + - Inputs: CTMS site, study and country data + - Workflow used to create table: `3_reporting/Groups.yaml` in `{gsm.reporting}` + - Usage: Group-level metadata dictionary. + - Structure: Long data frame, with certain `Param` required for given `GroupLevel` + + +| Table | Column | Description |Type | Optional | +|-------------|-----------------------|-----------------------------------|----------|-----------| +| Reporting_Groups | GroupID | Unique Group ID | Character| | +| Reporting_Groups | GroupLevel | Group Level (e.g. Site, Country) | Character| | +| Reporting_Groups | Param | Parameter Name (e.g. "Status") | Character| | +| Reporting_Groups | Value | Parameter Value (e.g. "Active") | Character| | + +Expected `Param` by `GroupLevel` for use in gsm reporting. User may add other Param values as needed. + +| GroupLevel | Param | Description |Value Type | +|--------------|----------------------|-----------------------------------|----------| +| Study | Status | Study Status | Character| +| Study | ParticipantCount | # of Enrolled Participants | Numeric | +| Study | SiteCount | # of Activated Sites | Numeric| +| Site | ParticipantCount | # of Enrolled Participants | Numeric | +| Site | Status | Site Status | Character | +| Site | InvestigatorFirstName | Investigator First name | Character | +| Site | InvestigatorLastName | Investigator Last name | Character | +| Site | City | City | Character| +| Site | State | State | Character | +| Site | Country | Country | Character | +| Country | EnrolledParticipants | # of Enrolled Participants | Numeric | + + +### `Reporting_Metrics` + - Function used to create table: `gsm.reporting::MakeMetric()` + - Inputs: `lWorkflows` - metadata for the corresponding kri(s) made with `gsm.core::MakeWorkflowList()` + - Workflow used to create table: `3_reporting/Metrics.yaml` in `{gsm.reporting}` + - Usage: Metadata used for charts and tables + - Structure: + + | Table | Column Name | Description | Type | Optional | + |----------|--------------|--------------------------------------|----------|--| -------------- | + | Reporting_Metrics| File | The YAML file for workflow | Character| | + | Reporting_Metrics| MetricID | ID for the Metric | Character| | + | Reporting_Metrics| Group | The group type for the metric (e.g. "Site") | Character| | + | Reporting_Metrics| Abbreviation | Abbreviation for the metric | Character| | + | Reporting_Metrics| Metric | Name of the metric | Character| | + | Reporting_Metrics| Numerator | Data source for the Numerator | Character| | + | Reporting_Metrics| Denominator | Data source for the Denominator | Character| | + | Reporting_Metrics| Model | Model used to calculate metric | Character| | + | Reporting_Metrics| Score | Type of Score reported | Character| | + +## Appendix 2 - Analysis Workflow Specifications + +Assessment workflow metadata objects are passed to the `lWorkflow` parameter in `RunWorkflow()` to define functions and parameters across multiple studies. + +The `lWorkflow` object is a named list of metadata and steps defining how each assessment should be run. By default, `gsm.core::MakeWorkflowList()` imports YAML specifications from `workflow/2_metrics` in `{gsm.kri}`. Each item in `lWorkflow` expects the following parameters in the `steps` section: + +- `workflow`: Array defining one or more functions to be executed as part of the workflow for a given assessment + - `workflow[]$meta`: specifies all of the metadata information for the KRI. + - `workflow[]$steps`: specifies all of the steps in the workflow. + - `workflow[]$steps$name`: name of the `{gsm}` function. + - `workflow[]$steps$inputs`: specifies the required input data + - `workflow[]$steps$output`: specifies the output data from the workflow step, which can be used as an input in the next step in the workflow + - `workflow[]$steps$params`: specifies parameters to be passed to the function + +For example, the default workflow for the AE assessment (`system.file("workflow/2_metrics/kri0001.yaml", package = "gsm.kri")`) is shown below: + +```{yaml eval = FALSE} +meta: + Type: Analysis + ID: kri0001 + GroupLevel: Site + Abbreviation: AE + Metric: Adverse Event Rate + Numerator: Adverse Events + Denominator: Days on Study + Model: Normal Approximation + Score: Adjusted Z-Score + AnalysisType: rate + Threshold: -2,-1,2,3 + nMinDenominator: 30 +spec: + Mapped_AE: + subjid: + type: character + Mapped_SUBJ: + subjid: + type: character + invid: + type: character + timeonstudy: + type: integer +steps: + - output: vThreshold + name: gsm.core::ParseThreshold + params: + strThreshold: Threshold + - output: Analysis_Input + name: gsm.core::Input_Rate + params: + dfSubjects: Mapped_SUBJ + dfNumerator: Mapped_AE + dfDenominator: Mapped_SUBJ + strSubjectCol: subjid + strGroupCol: invid + strGroupLevel: GroupLevel + strNumeratorMethod: Count + strDenominatorMethod: Sum + strDenominatorCol: timeonstudy + - output: Analysis_Transformed + name: gsm.core::Transform_Rate + params: + dfInput: Analysis_Input + - output: Analysis_Analyzed + name: Analyze_NormalApprox + params: + dfTransformed: Analysis_Transformed + strType: AnalysisType + - output: Analysis_Flagged + name: gsm.core::Flag_NormalApprox + params: + dfAnalyzed: Analysis_Analyzed + vThreshold: vThreshold + - output: Analysis_Summary + name: gsm.core::Summarize + params: + dfFlagged: Analysis_Flagged + nMinDenominator: nMinDenominator + - output: lAnalysis + name: list + params: + ID: ID + Analysis_Input: Analysis_Input + Analysis_Transformed: Analysis_Transformed + Analysis_Analyzed: Analysis_Analyzed + Analysis_Flagged: Analysis_Flagged + Analysis_Summary: Analysis_Summary + + ``` diff --git a/vignettes/DataReporting.Rmd b/vignettes/DataReporting.Rmd new file mode 100644 index 0000000..5bbeffb --- /dev/null +++ b/vignettes/DataReporting.Rmd @@ -0,0 +1,367 @@ +--- +title: "Step-by-Step Reporting Workflow" +description: "This vignette walks users through the mechanics of the functions and workflows that produce the Reporting outputs in the {gsm} pipeline." +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Step-by-Step Reporting Workflow} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +library(gsm.core) +library(gsm.kri) +library(gsm.mapping) +library(gsm.reporting) +library(dplyr) +library(DT) +library(gt) +set.seed(1) + +dt <- function(data){ + data %>% + DT::datatable( + extensions = 'FixedColumns', + options = list( + scrollX = FALSE, + fixedColumns = TRUE + ), + rownames = FALSE + ) +} +``` + +# Introduction + +This vignette walks users through the mechanics of the functions and workflows that produce all of the Reporting output within the `{gsm.reporting}` package. The `{gsm}` suite of packages leverages Key Risk Indicators (KRIs) and thresholds to conduct study-level, country-level and site-level Risk Based Monitoring for clinical trials. + +These functions and workflows produce data frames, visualizations, metadata, and reports to be used in reporting and error checking at clinical sites. The image below illustrates the overarching context in which the reporting workflow runs, taking inputs from both the output of the analytics workflow, as well as raw study-, site-, and country-level data in the Raw/Raw+ format. + +![](data_model_detailed.png){width="100%"} + +All of the functions to create the data frames in the reporting data model will run automatically and sequentially when a user specifies the metadata and data needed for the report, and calls upon the `gsm.core::RunWorkflow()` function on the yaml files in the `workflow/3_reporting` directory. To create a report, the output of the reporting yamls is fed into the yamls in the `workflow/4_modules` directory to produce and html document with all charts and tables created in the reporting workflow. For a more detailed discussion of the yaml file and directory structure, see the [`{gsm.core}` Extensions vignette](https://gilead-biostats.github.io/gsm.core/articles/gsmExtensions.html)`). + +Each of the individual functions can also be run independently outside of a specified yaml workflow. + +For the purposes of this documentation, we will evaluate the input(s) and output(s) of each individual function for a specific KRI to show the stepwise progression of how a yaml workflow is set up to handle and process reporting-level data. + +------------------------------------------------------------------------ + +## Case Study - Step-by-Step Full Site-Level Report + +We will use sample clinical data simulated from the [`{gsm.datasim}`](https://github.com/Gilead-BioStats/gsm.datasim) package to run the full site-level report for all 12 KRIs included in this package. The focus of this vignette is the reporting workflow, so the output of the analytics workflow will be briefly discussed, but only in the context of *inputs* to the reporting workflow. + +Additional supporting functions are explored in [Appendix 1](#appendix-1). + +### Step 0 - Run Analysis Workflow(s) + +Prior to running the reporting model to create reporting data frames, charts and reports, the metrics we are reporting on must be properly calculated and flagged with the analysis workflow. For more information on the Analysis Workflow, see the associated `vignette("DataAnalysis")`. + +To run the analysis workflow on all 13 KRIs using `clindata` Raw+ data, use the code snippet below. From this, three pieces of output will be used in the reporting workflow: + +1. `lAnalysis` - list of data frames in the analysis data model +2. `lWorkflow` - list containing the metadata for each of the KRIs +3. `mapped$Mapped_SUBJ` - mapped data.frame of enrolled participants + +```{r include = TRUE, message = FALSE} +core_mappings <- c("AE", "COUNTRY", "DATACHG", "DATAENT", "ENROLL", "LB", "PK", + "PD", "QUERY", "STUDY", "STUDCOMP", "SDRGCOMP", "SITE", "SUBJ") + +lSource <- gsm.core::lSource + +# Step 0 - Data Ingestion - standardize tables/columns names +mappings_wf <- MakeWorkflowList(strPath = "workflow/1_mappings", + strNames = core_mappings, + strPackage = "gsm.mapping") +mappings_spec <- CombineSpecs(mappings_wf) +lRaw <- Ingest(lSource, mappings_spec) + +# Step 1 - Create Mapped Data Layer - filter, aggregate and join raw data to create mapped data layer +mapped <- RunWorkflows(mappings_wf, lRaw) + +# Step 2 - Create Metrics - calculate metrics using mapped data +metrics_wf <- MakeWorkflowList(strPath = "workflow/2_metrics", strNames = "kri", strPackage = "gsm.kri") +lAnalysis <- RunWorkflows(metrics_wf, mapped) +``` + + +### Step 1 - Create Reporting Model Data Frames + +With all necessary inputs to the reporting model created, we can move on to generate the reporting data model data frames. These data frames created are as follows: + +1. `dfGroups`: Group-level metadata dictionary. Created by passing CTMS site and study data to `MakeLongMeta()`. +2. `dfMetrics`: Metric-specific metadata for use in charts and reporting. Created by passing an `lWorkflow` object to `MakeMetric()`. +3. `dfResults`: A stacked summary of analysis pipeline output. Created by passing a list of + results returned by `Summarize()` to `BindResults()`. +4. `dfBounds`: Set of predicted percentages/rates and upper- and lower-bounds across the full range of sample sizes/total exposure values for reporting. Created by passing `dfResults` and `dfMetrics` to `MakeBounds()`. + +For more details on any of these tables, see `vignette("DataModel")`. + +The following sub-steps will dive into the creation and structure of each of these tables. Sample data for each of these tables can found in `{gsm}` as `reportingGroups`, `reportingMetrics`, `reportingResults` and `reportingBounds`. These sample tables are used throughout the package in examples and documentation. + + +#### Step 1.1 - Transform CTMS data into `dfGroups` data frame + +The `dfGroups` data frame is critical to providing site-, study- and country-level information in the final report. This table is based on CTMS data and the mapped `dfEnrolled` data frame created in the Analysis workflow. Creating this table requires the creation of 5 smaller tables that summarize the data at each group level using `RunQuery()` and `MakeLongMeta()`. These small tables are then bound together to create `dfGroups`. + +```{r include = TRUE, message = FALSE} +#Transform CTMS Site and Study Level data +dfCTMSSite <- gsm.core::RunQuery(df = lSource$Raw_SITE, + strQuery = "SELECT pi_number as GroupID, site_status as Status, pi_first_name as InvestigatorFirstName, pi_last_name as InvestigatorLastName, city as City, state as State, country as Country, * FROM df") |> + gsm.mapping::MakeLongMeta(strGroupLevel = 'Site') + +dfCTMSStudy <- gsm.core::RunQuery(df = lSource$Raw_STUDY, + strQuery = "SELECT protocol_number as GroupID, status as Status, * FROM df") |> + gsm.mapping::MakeLongMeta(strGroupLevel = 'Study') + +# Get Participant and Site counts for Country, Site and Study +dfSiteCounts <- gsm.core::RunQuery(df = mapped$Mapped_SUBJ, + strQuery = "SELECT invid as GroupID, COUNT(DISTINCT subjid) as ParticipantCount, COUNT(DISTINCT invid) as SiteCount FROM df GROUP BY invid") |> + gsm.mapping::MakeLongMeta(strGroupLevel = "Site") + +dfStudyCounts <- gsm.core::RunQuery(df = mapped$Mapped_SUBJ, + strQuery = "SELECT studyid as GroupID, COUNT(DISTINCT subjid) as ParticipantCount, COUNT(DISTINCT invid) as SiteCount FROM df GROUP BY studyid") |> + gsm.mapping::MakeLongMeta(strGroupLevel = "Study") + +dfCountryCounts <- gsm.core::RunQuery(df = mapped$Mapped_SUBJ, + strQuery = "SELECT country as GroupID, COUNT(DISTINCT subjid) as ParticipantCount, COUNT(DISTINCT invid) as SiteCount FROM df GROUP BY country") |> + gsm.mapping::MakeLongMeta(strGroupLevel = "Country") + + +# Combine CTMS and Counts data as dfGroups +dfGroups <- dplyr::bind_rows(SiteCounts = dfSiteCounts, + StudyCounts = dfStudyCounts, + CountryCounts = dfCountryCounts, + Site = dfCTMSSite, + Study = dfCTMSStudy) +``` + +The resulting `dfGroups` dataframe contains the following columns: + +- `GroupID`: Group Identifier +- `GroupLevel`: Type of Group specified in `GroupID` (Country, Site, Study) +- `Param`: Parameter Name (e.g. "Status") +- `Value`: Parameter Value (e.g. "Active") + +A more detailed explanation of the `Param`s for each group level can be found in `vignette("DataModel")`. + +```{r, echo = FALSE, warning = FALSE} +datatable(dfGroups) +``` + +#### Step 1.2 - Create `dfMetrics` Metadata + +The `dfMetrics` table contains the metadata for each of the KRIs in the report. This information comes from the `meta` section of the metric workflows, `metrics_wf` defined in Step 0. Using this workflow information as the input, `MakeMetric()` is used to produce a data frame with one row per metric. + + +```{r include = TRUE} +dfMetrics <- gsm.reporting::MakeMetric(lWorkflows = metrics_wf) +``` + +The resulting `dfMetrics` dataframe contains the following columns: + + - `File`: The yaml file for workflow + - `MetricID`: ID for the Metric + - `Group`: The group type for the metric (e.g. "Site") + - `Abbreviation`: Abbreviation for the metric + - `Metric`: Name of the metric + - `Numerator`: Data source for the Numerator + - `Denominator`: Data source for the Denominator + - `Model`: Model used to calculate metric + - `Score`: Type of Score reported + - `Threshold`: Thresholds to be used for bounds and flags + + +```{r, echo = FALSE, warning = FALSE} +datatable(dfMetrics) +``` + +#### Step 1.3 - Stack `dfSummary` data into `dfResults` + +The reporting workflow requires that all metrics are stacked into a single data frame, `dfResults`. This stacked data frame is created by feeding the `lAnalysis` list from the analysis workflow into `BindResults()` along with the snapshot date and the study id. + +```{r include = TRUE} +dfResults <- gsm.reporting::BindResults(lAnalysis = lAnalysis, + strName = "Analysis_Summary", + dSnapshotDate = Sys.Date(), + strStudyID = "ABC-123") +``` + +The resulting `dfResults` data frame contains the following columns: + + - `GroupID`: Group Identifier + - `GroupLevel`: Type of Group specified in `GroupID` (Country, Site, Study) + - `Numerator`: The calculated numerator value + - `Denominator`: The calculated denominator value + - `Metric`: The calculated rate/metric value + - `Score`: The calculated metric score + - `Flag`: The calculated flag + - `MetricID`: The Metric ID + - `StudyID`: The Study ID + - `SnapshotDate`: The Date of the snapshot + + +```{r, echo = FALSE, warning = FALSE} +datatable(dfResults) %>% +formatRound(columns = "Metric", digits = 3) +``` + +#### Step 1.4 - Create `dfBounds` for Confidence Intervals + +Several of the charts created for the KRI reports use confidence intervals and bounds to delineate the observations based on the flag they receive (no flag, amber or red). In order to create the data frame that contains the information about these boundaries, `dfBounds`, `dfResults` and `dfMetrics` is fed into the `MakeBounds()` function. The `MakeBounds()` function is a wrapper around the `Analyze_*_PredictBounds()` functions that create the bounds based on the model used to estimate the metric(Normal Approximation or Poisson). + +```{r include = TRUE} +dfBounds <- gsm.reporting::MakeBounds(dfResults = dfResults, + dfMetrics = dfMetrics) +``` + +The resulting `dfBounds` data frame contains the following columns: + + - `Threshold`: The number of standard deviations that the upper and lower bounds are based on + - `Denominator`: The calculated denominator value + - `LogDenominator`: The calculated log denominator value + - `Numerator`: The calculated numerator value + - `Metric`: The calculated rate/metric value + - `MetricID`: The Metric ID + - `StudyID`: The Study ID + - `SnapshotDate`: The Date of the snapshot + + +```{r, echo = FALSE, warning = FALSE} +datatable(dfBounds) %>% +formatRound(columns = "Metric", digits = 3) +``` + +### Step 2 - Create Visualizations + +Now that all of the data frames in the reporting data model have been created, we can create the charts that display this data in a useful and easily interpreted way. All four of the data frames created in Step 1 are fed into the `MakeCharts()` function to create all relevant charts given the input data. `MakeCharts()` is a wrapper around several helper functions that generate each static visualization and JS widget individually. Appendix 1 goes into more detail about each of these individual functions. + +```{r include = TRUE, warning = FALSE, message = FALSE} +lCharts <- gsm.kri::MakeCharts(dfResults = dfResults, + dfGroups = dfGroups, + dfBounds = dfBounds, + dfMetrics = dfMetrics) +``` + +The output of `MakeCharts` is a list containing the following charts: + - `scatterJS`: A scatter plot using JavaScript. + - `scatter`: A scatter plot using ggplot2. + - `barMetricJS`: A bar chart using JavaScript with metric on the y-axis. + - `barScoreJS`: A bar chart using JavaScript with score on the y-axis. + - `barMetric`: A bar chart using ggplot2 with metric on the y-axis. + - `barScore`: A bar chart using ggplot2 with score on the y-axis. + - `timeSeriesContinuousScoreJS`: A time series chart using JavaScript with score on the y-axis. + - `timeSeriesContinuousMetricJS`: A time series chart using JavaScript with metric on the y-axis. + - `timeSeriesContinuousNumeratorJS`: A time series chart using JavaScript with numerator on the y-axis. + +If the data only contains one snapshot data then the `timeseries` charts will not be created. + +Below are the static and interactive versions of the scatter plot for the AE KRI: + +```{r fig.height=6, fig.width=8, warning=FALSE} +lCharts$Analysis_kri0001$scatter + +lCharts$Analysis_kri0001$scatterJS +``` + + +### Step 3 - Generate Report + +All of the components are created to generate the HTML report for the study we are working on. In order to generate this report and save it locally, simply feed `lCharts`, `dfResults`, `dfGroups`, `dfMetrics` and (optionally) an absolute directory path and file to which the report will be saved (`strOutputDir` and `strOutputFile`, respectively) into `Report_KRI()` and the HTML output will be knit from the `Report_KRI.Rmd` template. All intermediate files from the knitting process will be saved in a temporary folder. + +```{r eval = FALSE, include = TRUE} +lReport <- gsm.kri::Report_KRI(lCharts = lCharts, + dfResults = dfResults, + dfGroups = dfGroups, + dfMetrics = dfMetrics, + strOutputFile = "test_kri_report.html") +``` + +Below, you will see a screenshot from the beginning of the report. All charts for all metrics that were included throughout the analysis and reporting workflows will be included in this report. + +![](report_screenshot.png){width="100%"} + +------------------------------------------------------------------- + +## Using YAML Workflows to generate reports + +While it is helpful to understand how each step of this process works, we have provided a series of YAML workflow files that make running reports on multiple KRIs easy and with the ability to be automated. + +Here, you will see how to run your workflows. The general approach is to run the analytics workflow(s), followed by the reporting workflow `data_reporting.yaml` followed by the charts and reports workflow `reports.yaml`. This allows the user to examine the output of each workflow individually before moving on to the next step. + +### Option 1 - Run All Workflows Separately + +```{r eval = FALSE, include = TRUE} +# Step 1 - Create Mapped Data - filter/map raw data +# Source Data +core_mappings <- c("AE", "COUNTRY", "DATACHG", "DATAENT", "ENROLL", "LB", "PK", + "PD", "QUERY", "STUDY", "STUDCOMP", "SDRGCOMP", "SITE", "SUBJ") + +lSource <- gsm.core::lSource + +# Step 0 - Data Ingestion - standardize tables/columns names +mappings_wf <- gsm.core::MakeWorkflowList(strNames = core_mappings, strPath = "workflow/1_mappings", strPackage = "gsm.mapping") +mappings_spec <- gsm.mapping::CombineSpecs(mappings_wf) +lRaw <- gsm.mapping::Ingest(lSource, mappings_spec) + +# Step 1 - Create Mapped Data Layer - filter, aggregate and join raw data to create mapped data layer +mapped <- gsm.core::RunWorkflows(mappings_wf, lRaw) + +# Step 2 - Create Metrics - calculate metrics using mapped data +metrics_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/2_metrics", strPackage = "gsm.kri") +analyzed <- gsm.core::RunWorkflows(metrics_wf, mapped) + +# Step 3 - Create Reporting Layer - create reports using metrics data +reporting_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/3_reporting", strPackage = "gsm.reporting") +reporting <- gsm.core::RunWorkflows(reporting_wf, c(mapped, list(lAnalyzed = analyzed, lWorkflows = metrics_wf))) + +# Step 4 - Create KRI Report - create KRI report using reporting data +module_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/4_modules", strPackage = "gsm.kri") +lReports <- gsm.core::RunWorkflows(module_wf, reporting) +``` + + +---------------------------------------------------------------- + +### Recap - Reporting Workflow + + - `dfGroups` created from CTMS data using `RunQuery()`, `MakeLongMeta()` and `bind_rows()` + - `dfMetrics` created from `lWorkflow` using `MakeMetric()` + - `dfResults` created from `lAnalysis$dfSummary` using `BindResults()` + - `dfBounds` created from `dfResults` using `MakeBounds()` + - List of all charts and tables (`lCharts`) created from `dfResults`, `dfBounds`, `dfMetrics` and `dfGroups` using `MakeCharts()` + - Report generated from `lCharts`, `dfResults`, `dfMetrics` and `dfGroups` using `Report_KRI()` + +----------------------------------------------------------------- + +# Appendix 1 - Supporting Functions {#appendix-1} + + +### Mapping Functions + + - `gsm.core::RunQuery()`: Run a SQL query to create new data.frames with filtering and column name specifications. + + +### Visualization Functions + +- `gsm.kri::Visualize_Scatter()`: Creates scatter plot of Total Exposure (in days, on log scale) vs Total Number of Event(s) of Interest (on linear scale). Each data point represents one site. Outliers are plotted in red with the site label attached. This plot is only created when statistical method is **not** defined as `identity`. Chart is called `scatter` in the `lCharts` object. +- `gsm.kri::Visualize_Score()`: Provides a standard visualization for Score or KRI. Charts are called `barScore` or `barMetric` in the `lCharts` object. +- `gsm.kri::Visualize_Metric()`: Creates all available charts and tables for a metric using the data provided. + + +### Widget Functions + +- `gsm.kri::Widget_GroupOverview()`: Creates an interactive table displaying the flag distribution for all groups across all metrics. +- `gsm.kri::Widget_BarChart()`: Creates an interactive bar chart visualization for Score or KRI. Charts are called `barScoreJS` or `barMetricJS` in the `lCharts` object. +- `gsm.kri::Widget_ScatterPlot()`: Creates an interactive scatter plot of Total Exposure (in days, on log scale) vs Total Number of Event(s) of Interest (on linear scale). Each data point represents one site. Outliers are plotted in red with the site label attached.Chart is called `scatterJS` in the `lCharts` object. +- `gsm.kri::Widget_TimeSeries()`: Creates an interactive time series scatter plot of the score, metric or numerator. Charts are called `timeSeriesContinuousScoreJS`, `timeSeriesContinuousMetricJS`, or `timeSeriesContinuousNumeratorJS` in the `lCharts` object. + +### Table Functions + +- `gsm.kri::Report_MetricTable()`: Creates a sortable table displaying the flags per group (e.g. Site, Country) for one metric at a time. + + diff --git a/vignettes/articles/ContributorGuidelines.Rmd b/vignettes/articles/ContributorGuidelines.Rmd index 449fa40..84b7d78 100644 --- a/vignettes/articles/ContributorGuidelines.Rmd +++ b/vignettes/articles/ContributorGuidelines.Rmd @@ -1,5 +1,6 @@ --- title: "Contributor Guidelines" +description: "This page outlines the development process for `{gsm}` packages, including how to contribute by filing issues, bug reports, and submitting code via a Pull Request." output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Contributor Guidelines} @@ -8,7 +9,7 @@ vignette: > --- ```{r setup, include = FALSE} -library(gsm) +library(gsm.core) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" @@ -17,11 +18,11 @@ knitr::opts_chunk$set( # Introduction -This page outlines the development process for `{gsm}`, including how to contribute by filing issues, bug reports, and submitting code via a Pull Request. +This page outlines the development process for `{gsm}` packages, including how to contribute by filing issues, bug reports, and submitting code via a Pull Request. ## Prerequisites -Before contributing code via a Pull Request, make sure to file an [issue](https://github.com/Gilead-BioStats/gsm/issues/new/choose) using one of the pre-specified issue templates. Choose the template that best categorizes what you aim to contribute, which generally can be one of the following: +Before contributing code via a Pull Request, make sure to file an [issue](https://github.com/Gilead-BioStats/gsm.core/issues/new/choose) using one of the pre-specified issue templates. Choose the template that best categorizes what you aim to contribute, which generally can be one of the following: - Bugfix Issue: Fix a bug in the code - Feature Issue: Develop a new feature @@ -31,7 +32,7 @@ Someone from the development team will decide if the issue is in scope. If so, t The issue templates provide comments/prompts to help ensure that all relevant information is included. When submitting issues for bug fixes or specific feature requests, it is often helpful to provide a minimal [reprex](https://www.tidyverse.org/help/#reprex), or reproducible example, to help the core developers visualize the issue. -Suggestions or other input that might not warrant formal submission of an issue can be filed under [discussions](https://github.com/Gilead-BioStats/gsm/discussions), which can help facilitate discourse of specific use-cases or requests. +Suggestions or other input that might not warrant formal submission of an issue can be filed under [discussions](https://github.com/Gilead-BioStats/gsm.core/discussions), which can help facilitate discourse of specific use-cases or requests. ## Branches @@ -46,7 +47,7 @@ The core branches that are used in this repository are: All code development takes place in `fix` branches. This section provides general guidance about this process flow. A detailed step-by-step workflow for code development in `fix` branches can be found in the first section of [Appendix 1](#fix-branch-workflow) below. -Once an issue is filed and delegated to a core developer, a `fix` branch will be opened, which is where all package development related to that issue will be conducted. Each `fix` branch should be linked to one or more of the filed GitHub [issue(s)](https://github.com/Gilead-BioStats/gsm/issues). The issue(s) will be referenced in the naming of the `fix` branch. For example, a branch named `fix-111` addresses issue #111. Tasks related to documentation, testing, and/or qualification may also use `fix` branches and associated issues. +Once an issue is filed and delegated to a core developer, a `fix` branch will be opened, which is where all package development related to that issue will be conducted. Each `fix` branch should be linked to one or more of the filed GitHub [issue(s)](https://github.com/Gilead-BioStats/gsm.core/issues). The issue(s) will be referenced in the naming of the `fix` branch. For example, a branch named `fix-111` addresses issue #111. Tasks related to documentation, testing, and/or qualification may also use `fix` branches and associated issues. In addition to the above, please also use the following general guidelines when creating a Pull Request: @@ -78,7 +79,7 @@ Code developers for `{gsm}` use the [tidyverse style guide](https://style.tidyve ## `fix` Branch Workflow {#fix-branch-workflow} 1. Create issue(s) defining addition(s) and/or revision(s): - - Select the appropriate [template](https://github.com/Gilead-BioStats/gsm/issues/new/choose) to use (should be one of the following): + - Select the appropriate [template](https://github.com/Gilead-BioStats/gsm.core/issues/new/choose) to use (should be one of the following): - `Bugfix Issue` - `Feature Issue` - `QC Issue` @@ -98,12 +99,10 @@ Code developers for `{gsm}` use the [tidyverse style guide](https://style.tidyve ## `release` Branch Workflow {#release-branch-workflow} 1. Release Owner creates `release` branch from `dev` branch. - - The `release` branch should be named according to the version of `{gsm}` being released (e.g., `release-v1.2.0`) using [semantic versioning](https://semver.org/). + - The `release` branch should be named according to the version of the package being released (e.g., `release-v1.2.0`) using [semantic versioning](https://semver.org/). - If a release branch is already created, make sure that it is synced with the current `dev` branch. 2. Release Owner prepares the release for QC by performing the following steps and pushing updates to the `release` branch: - Confirm that the version in the `DESCRIPTION` file is up to date. - - **After** the version in the `DESCRIPTION` file is updated, run `gsm::UpdateGSMVersion()` to update metadata that includes the `{gsm}` version number. - - Run `styler` using the script from the [style guide](#style-guide) above (or by running `gutil::style_code()`) and commit any updates. - Update `NEWS.md` with a summary of the revisions/additions in the release. Keep any information from previous releases to maintain traceability through versions. - Ensure that the qualification specifications spreadsheet is up-to-date and accurate. If there have been any changes/updates to qualification tests, reach out to the qualification developer to update any necessary files. @@ -114,12 +113,12 @@ Code developers for `{gsm}` use the [tidyverse style guide](https://style.tidyve - Build site using `pkgdown::build_site()`. Check that all examples are displayed correctly and that all new functions occur on the Reference page. - Open a clean R session. Run `devtools::install()` and then `devtools::check()` locally and confirm that there are no issues/conflicts. 3. Release Owner creates Pull Request from the `release` branch to the `main` branch: - - Use the [release Pull Request template](https://github.com/Gilead-BioStats/gsm/blob/dev/.github/PULL_REQUEST_TEMPLATE/release.md) by adding `?template=release.md` to the URL when creating the Pull Request. The user can also click the link, then click `Raw`, and copy/paste the displayed Markdown into the Pull Request. + - Use the [release Pull Request template](https://github.com/Gilead-BioStats/gsm.core/blob/dev/.github/PULL_REQUEST_TEMPLATE/release.md) by adding `?template=release.md` to the URL when creating the Pull Request. The user can also click the link, then click `Raw`, and copy/paste the displayed Markdown into the Pull Request. - Assign Pull Request to self. - Request QC review(s). - Assign milestone. - Complete Risk Assessments for each Assessment/Feature added as outlined in the Pull Request template. - - Create comments in the Pull Request with a unique [QC checklist](#appendix-2---qc-checklist) for each selected Assessment/Feature (See [example for v0.1.0](https://github.com/Gilead-BioStats/gsm/pull/194)). + - Create comments in the Pull Request with a unique [QC checklist](#appendix-2---qc-checklist) for each selected Assessment/Feature (See [example for v0.1.0](https://github.com/Gilead-BioStats/gsm.core/pull/194)). 4. QC Reviewer(s) conduct(s) review by: - Completing all QC checklists in the Pull Request. - Ensuring all GitHub Actions on the Pull Request to the `main` branch are passing. @@ -200,7 +199,7 @@ This QC checklist is to be used as part of the Development and Release Workflows # Appendix 3 - Continuous Integration with GitHub Actions {#github-action-workflow} -GitHub Actions are used in `{gsm}` to automate processes and ensure all code and documentation is created consistently and documented thoroughly. +GitHub Actions are used in all `{gsm}` packages to automate processes and ensure all code and documentation is created consistently and documented thoroughly. ## Merges to `dev` Branch @@ -211,7 +210,7 @@ GitHub Actions are used in `{gsm}` to automate processes and ensure all code and - Builds Assessment Specification tables from function documentation - Outputs are added to `man`/`.md` and any changes are committed to the compare branch or the triggering Pull Request. - Test Coverage (`test-coverage`): - - Uses `{covr}` to check the package coverage of `{gsm}`. + - Uses `{covr}` to check the package coverage. - Qualification Check (`qualification-check-dev`): - Runs the qualification tests but will not fail if any of the tests do not pass. Developers should review this check when changes that might need updates to qualification are done. @@ -221,6 +220,6 @@ GitHub Actions are used in `{gsm}` to automate processes and ensure all code and - Basic R CMD check which can be run using `rcmdcheck::rcmdcheck()` - Provides an additional check for the ability to build the `pkgdown` reference index and ensure that all functions are documented correctly. The check will also run all qualification tests to ensure that the release is fully qualified. This check will run on `ubuntu-latest` and on R version 4.1.3. Additionally, it will be run on the latest R release version on `windows-latest`, `macOS-latest`, and `ubuntu-latest`. - `pkgdown`: - - Builds the [pkgdown site](https://gilead-biostats.github.io/gsm/) for `{gsm}`. + - Builds the [pkgdown site](https://gilead-biostats.github.io/gsm.core/) for the relevant package (`{gsm.core}` in this case). - Qualification Report (`qualification-report`): - Builds the qualification vignette as an attached artifact to the Pull Request. This should be reviewed by the Pull Request Owner for completeness and correctness to ensure that the artifact added to the release is correct. diff --git a/vignettes/articles/KRIMethod.Rmd b/vignettes/articles/KRIMethod.Rmd index d0c3a05..e2bb25e 100644 --- a/vignettes/articles/KRIMethod.Rmd +++ b/vignettes/articles/KRIMethod.Rmd @@ -8,7 +8,7 @@ vignette: > --- ```{r setup, include = FALSE} -library(gsm) +library(gsm.core) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" diff --git a/vignettes/gsmExtensions.Rmd b/vignettes/gsmExtensions.Rmd index 8d02fa5..eaa38b1 100644 --- a/vignettes/gsmExtensions.Rmd +++ b/vignettes/gsmExtensions.Rmd @@ -1,5 +1,6 @@ --- title: "gsm Extensions" +description: "This vignette describes how to extend {gsm.core} by creating new 'modules', including metrics, reports and shiny apps that can be run using the standard gsm pipeline." output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{gsm Extensions} @@ -16,9 +17,9 @@ knitr::opts_chunk$set( # Overview -This vignette describes how to extend `gsm` by creating new "modules", including metrics, reports and shiny apps that can be run using the standard `gsm` pipeline described in these vignettes (`vignette("DataAnalysis")`, `vignette("DataReporting")`). As shown in the `vignette("DataAnalysis")`, the `gsm` data pipeline can be used to capture a monitoring 'snapshot' for a study that includes a variety of "modules" including [metrics](https://github.com/Gilead-BioStats/gsm/tree/dev/inst/workflow/metrics/kri0001.yaml) and [reports](https://github.com/Gilead-BioStats/gsm/blob/dev/inst/workflow/reports/report_kri_site.yaml). Some core modules are included in the `gsm` package, while others can be added as extensions. +This vignette describes how to extend `{gsm.core}` by creating new "modules", including metrics, reports and shiny apps that can be run using the standard `gsm` pipeline described in these vignettes (`vignette("DataAnalysis")`, and [DataReporting](https://gilead-biostats.github.io/gsm.reporting/articles/DataReporting.html). As shown in the `vignette("DataAnalysis")`, the existing `gsm` data pipeline can be used to capture a monitoring 'snapshot' for a study that includes a variety of "modules" including [metrics](https://github.com/Gilead-BioStats/gsm.mapping/tree/dev/inst/workflow/metrics/kri0001.yaml) and [reports](https://github.com/Gilead-BioStats/gsm.kri/blob/dev/inst/workflow/reports/report_kri_site.yaml). -This vignette provide detailed specifications for creating new modules, a description of the directory structure for the yaml workflows that comprise a module pipeline, and links to resources that can be used to configure study-level gsm pipelines that utilize these `gsm` extensions. +This vignette provide detailed specifications for creating new modules, a description of the directory structure for the yaml workflows that comprise a module pipeline, and links to resources that can be used to configure study-level gsm pipelines that utilize these extensions. # Module Configuration @@ -34,8 +35,8 @@ Detailed specifications for each of these sections are provided below. Here are links to several sample module configuration files: -- [12 Standard gsm KRIs](https://github.com/Gilead-BioStats/gsm/tree/dev/inst/workflow/metrics) (e.g. [Adverse Event KRI Metric](https://github.com/Gilead-BioStats/gsm/tree/dev/inst/workflow/metrics/kri0001.yaml)) -- [Site-](https://github.com/Gilead-BioStats/gsm/blob/dev/inst/workflow/reports/report_kri_site.yaml) and [Country-level KRI Report](https://github.com/Gilead-BioStats/gsm/blob/dev/inst/workflow/reports/report_kri_country.yaml) +- [12 Standard gsm KRIs](https://github.com/Gilead-BioStats/gsm.kri/tree/dev/inst/workflow/2_metrics) (e.g. [Adverse Event KRI Metric](https://github.com/Gilead-BioStats/gsm.kri/tree/dev/inst/workflow/2_metrics/kri0001.yaml)) +- [Site-](https://github.com/Gilead-BioStats/gsm.kri/blob/dev/inst/workflow/4_modules/report_kri_site.yaml) and [Country-level KRI Report](https://github.com/Gilead-BioStats/gsm.kri/blob/dev/inst/workflow/4_modules/report_kri_country.yaml) ## `meta` Specification @@ -49,7 +50,7 @@ The `meta` section of a workflow YAML provides key metadata describing the modul - `Details`: *optional* A more detailed description of the module specified in the workflow. - `Repo`: Package repo and version. Should be compatible with the `repo` parameter in `remotes::install_github()`. - `Status`: The validation status of the reporting output. Valid values: - - `Qualified`: Output has been qualified via our qualification process specified [here](https://gilead-biostats.github.io/gsm/articles/QualificationWorkflow.html). + - `Qualified`: Output has been qualified via our qualification process specified [here](https://gilead-biostats.github.io/gsm.qc/articles/QualificationWorkflow.html). - `Pilot`: Output is being used by pilot studies and is maintained in a package repository. - `Prototype`: Output is created using custom scripts on an ad-hoc basis. @@ -59,7 +60,7 @@ Additional `meta` header required fields for **Modules**: - `Output`: The output of the workflow, including format. Each workflow should only produce a single reporting output. - `ExampleURL`: Location of a sample report. For html reports, this is typically a page on the pkgdown site (ending with "/{ModuleID}.html"), or a sample app deployed on [shinyapps.io](https://shinyapps.io). -Additional `meta` header required fields for `{gsm}` **metrics**: +Additional `meta` header required fields for `{gsm.kri}` **metrics**: - `GroupLevel`: The level at which the metric is calculated. Common values: `Site`, `Country`. - `Abbreviation`: Abbreviation of the metric. @@ -101,7 +102,7 @@ The `spec` section of the workflow YAML is formatted as a list of data tables, w ### `spec` example: Metric Module -Metric `spec`s are typically pulled from the `mapped` data layer. For example, the `spec` section for the [AE KRI metric](https://github.com/Gilead-BioStats/gsm/tree/dev/inst/workflow/metrics/kri0001.yaml) is: +Metric `spec`s are typically pulled from the `mapped` data layer. For example, the `spec` section for the [AE KRI metric](https://github.com/Gilead-BioStats/gsm.kri/tree/dev/inst/workflow/2_metrics/kri0001.yaml) is: ``` spec: @@ -121,7 +122,7 @@ So, in summary, the AE KRI metric requires two data tables, `Mapped_AE` and `Map ### `spec` examples: Report Module -Report modules most often pull data from the `Reporting` data layer. For example, the [Site-level KRI report](https://github.com/Gilead-BioStats/gsm/blob/dev/inst/workflow/reports/report_kri_site.yaml) has the following `spec`: +Report modules most often pull data from the `Reporting` data layer. For example, the [Site-level KRI report](https://github.com/Gilead-BioStats/gsm.kri/blob/dev/inst/workflow/4_modules/report_kri_site.yaml) has the following `spec`: ``` spec: @@ -132,7 +133,7 @@ spec: Note that the `_all` key word is used to specify that all standard columns from the `Reporting_Results` data table are expected and that the table required - without it, the report can't run. The other `Reporting` tables are used to enhance the report, but are not required, and thus not included in the spec. -The `Mapped` data layer is also available for use in reports and apps. Most typically, mapped data is used to drill down from high-level metric findings (e.g. "Site 5 has an elevated AE rate relative to other studies") to site- or participant- level details (e.g. "Participant 00016 from Site 5 had 5 AEs and 3 SAEs reported in the last 3 months."). For example, the [Deep Dive app]() includes both Reporting and Mapped data in its `spec`. Here is a representative excerpt from the `spec`: +The `Mapped` data layer is also available for use in reports and apps. Most typically, mapped data is used to drill down from high-level metric findings (e.g. "Site 5 has an elevated AE rate relative to other studies") to site- or participant- level details (e.g. "Participant 00016 from Site 5 had 5 AEs and 3 SAEs reported in the last 3 months."). For example, the [Deep Dive app](https://openrbqm.shinyapps.io/gsm-app/) includes both Reporting and Mapped data in its `spec`. Here is a representative excerpt from the `spec`: ``` spec: @@ -153,32 +154,32 @@ spec: Finally, each module yaml configuration file should have a `steps` property that describes in detail how the module is run. The `steps` section is a list of functions that are run in sequence to produce the final output. Each item in `steps` has the following properties: -- `name`: The name of the function to be run. This must be a function that is available in `{gsm}` package or in a package that is listed in the `repo` section of the `meta` header. +- `name`: The name of the function to be run. This is typically be a function that is available in one of `{gsm}` packages or in a package that is listed in the `repo` section of the `meta` header. - `output`: The name of the output of the function. This is the name of the data table that is created by the function. - `params`: A list of parameters that are passed to the function. The parameters are specific to the function that is being run. See below for more details on how to specify parameters for each function. -**Note**: It is important to note that the default behavior of the `RunWorkflow()` and `RunWorkflows()` functions is to return the *last* output in the steps section of the workflow. therefore, each yaml file- regardless of which directory it is in- should only produce one output, whether that be a data table, list, html output, deployed shiny app, or any other object needed to produce the module output. +**Note**: It is important to note that the default behavior of the `gsm.core::RunWorkflow()` and `gsm.core::RunWorkflows()` functions is to return the *last* output in the steps section of the workflow. therefore, each yaml file- regardless of which directory it is in- should only produce one output, whether that be a data table, list, html output, deployed shiny app, or any other object needed to produce the module output. -The `steps` is the most complex part of the module configuration and will vary greatly depending on the module type and the specific requirements of the module. `gsm` provides several functions that allow for module yaml files to be run in a standard way. See `?gsm::RunWorkflow()` for more details. +The `steps` is the most complex part of the module configuration and will vary greatly depending on the module type and the specific requirements of the module. `gsm.core` provides several functions that allow for module yaml files to be run in a standard way. See `?gsm.core::RunWorkflow()` for more details. ### `steps[]$params` Specification -After processing the YAML `meta` and `spec` sections, `gsm::RunWorkflow()` calls `gsm::RunStep()` for each step in the `steps` section of the YAML. The `params` section of each step is passed to `RunStep()` as a list of parameters along with a copy of the metadata header (`lMeta`) and any data (`lData`). `RunStep()` then parses the list of `params` by passing data from `lMeta` and `lData` when appropriate - see `?RunStep` for a detailed of how parameter values are populated. Finally, the parsed parameters are passed to the function specified in the `name` field of the step. +After processing the YAML `meta` and `spec` sections, `gsm.core::RunWorkflow()` calls `gsm.core::RunStep()` for each step in the `steps` section of the YAML. The `params` section of each step is passed to `gsm.core::RunStep()` as a list of parameters along with a copy of the metadata header (`lMeta`) and any data (`lData`). `gsm.core::RunStep()` then parses the list of `params` by passing data from `lMeta` and `lData` when appropriate - see `?gsm.core::RunStep` for a detailed of how parameter values are populated. Finally, the parsed parameters are passed to the function specified in the `name` field of the step. ### `steps` examples #### `Metric` steps example -In the example below, the steps to produce the AE analysis output is specified. Here, `Threshold`, `GroupLevel`, `Type` and `nMinDenominator` are specified in the `meta` section of the workflow, and would be access via the `paramVal` process discussed above. As a default, the output of these steps as run with `RunWorkflows()` would be a list of data tables, as specified in the final `list` step of the workflow. +In the example below, the steps to produce the AE analysis output is specified. Here, `Threshold`, `GroupLevel`, `Type` and `nMinDenominator` are specified in the `meta` section of the workflow, and would be access via the `paramVal` process discussed above. As a default, the output of these steps as run with `gsm.core::RunWorkflows()` would be a list of data tables, as specified in the final `list` step of the workflow. ``` steps: - - name: ParseThreshold + - name: gsm.core::ParseThreshold output: vThreshold params: strThreshold: Threshold - - name: Input_Rate + - name: gsm.core::Input_Rate output: Analysis_Input params: dfSubjects: Mapped_SUBJ @@ -190,21 +191,21 @@ steps: strNumeratorMethod: Count strDenominatorMethod: Sum strDenominatorCol: timeonstudy - - name: Transform_Rate + - name: gsm.core::Transform_Rate output: Analysis_Transformed params: dfInput: Analysis_Input - - name: Analyze_NormalApprox + - name: gsm.core::Analyze_NormalApprox output: Analysis_Analyzed params: dfTransformed: Analysis_Transformed strType: AnalysisType - - name: Flag_NormalApprox + - name: gsm.core::Flag output: Analysis_Flagged params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold - - name: Summarize + - name: gsm.core::Summarize output: Analysis_Summary params: dfFlagged: Analysis_Flagged @@ -222,28 +223,28 @@ steps: #### `Report` steps example -In this example, the steps to produce a site-level KRI report is displayed. Here, the only inputs are the `Reporting_*` data, which goes through a simple filtering process via `RunQuery` before the Charts and Report are created in the following two functions +In this example, the steps to produce a site-level KRI report is displayed. Here, the only inputs are the `Reporting_*` data, which goes through a simple filtering process via `gsm.core::RunQuery` before the Charts and Report are created in the following two functions ``` steps: - - name: RunQuery + - name: gsm.core::RunQuery output: Reporting_Results_Site params: df: Reporting_Results strQuery: "SELECT * FROM df WHERE GroupLevel == 'Site'" - - name: RunQuery + - name: gsm.core::RunQuery output: Reporting_Metrics_Site params: df: Reporting_Metrics strQuery: "SELECT * FROM df WHERE GroupLevel == 'Site'" - - name: MakeCharts + - name: gsm.kri::MakeCharts output: lCharts_Site params: dfResults: Reporting_Results_Site dfGroups: Reporting_Groups dfBounds: Reporting_Bounds dfMetrics: Reporting_Metrics_Site - - name: Report_KRI + - name: gsm.kri::Report_KRI output: lReport params: lCharts: lCharts_Site @@ -259,7 +260,7 @@ Each extension that produces report(s) will have a `workflow` directory in the ` ### `/1_mappings` -The mappings folder contains all of the mappings from `Raw_*` data to `Mapped_*` data. Each file within this directory is to be named for the data table it is creating, minus the `Mapped_` suffix. The yamls will contain the three required sections, which are discussed in detail in the `Module Configuration` section above. The yamls in this folder will be combined via `CombineSpecs()` to create a master spec that defines all necessary tables and columns for the module(s) in this package. +The mappings folder in the `{gsm.mapping}` package contains all of the mappings from `Raw_*` data to `Mapped_*` data. Each file within this directory is to be named for the data table it is creating, minus the `Mapped_` suffix. The yamls will contain the three required sections, which are discussed in detail in the `Module Configuration` section above. The yamls in this folder will be combined via `gsm.mapping::CombineSpecs()` to create a master spec that defines all necessary tables and columns for the module(s) in this package. Below are two examples of these mapping yaml files- the first which requires no transformations, and is very simple, and the second which requires multiple steps to produce the desired mapped data. @@ -306,13 +307,13 @@ spec: steps: # Merge [ subjid ] onto EDC domains. - output: Temp_SubjectLookup - name: select + name: dplyr::select params: .data: Mapped_SUBJ subjid: subjid subject_nsv: subject_nsv - output: Mapped_DATACHG - name: left_join + name: dplyr::left_join params: x: Raw_DATACHG "y": Temp_SubjectLookup @@ -321,7 +322,7 @@ steps: ### `/2_metrics` -The metrics directory contains all of the workflows that perform analysis steps, converting mapped data into metrics that are displayed in a report. In the case of `{gsm}`, these metrics are the 12 Key Risk Indicators, calculated at both the site- and country-level, that are discussed in the Data Analysis Step-by-Step Vignette. Each yaml in this file produces a list of analysis data tables that capture the formatted input table, the transformed table, the flagged table, and the summary table. In general, these yamls should at least provide a summary table that contains statistics about the metric at the specified level of aggregation. +The metrics directory contains all of the workflows that perform analysis steps, converting mapped data into metrics that are displayed in a report. In the case of `{gsm.kri}`, these metrics are the 12 Key Risk Indicators, calculated at both the site- and country-level, that are discussed in the Data Analysis Step-by-Step Vignette. Each yaml in this file produces a list of analysis data tables that capture the formatted input table, the transformed table, the flagged table, and the summary table. In general, these yamls should at least provide a summary table that contains statistics about the metric at the specified level of aggregation. Examples of these yamls can be found above in the `Module Configuration` section, as well as in the (`vignette("DataAnalysis")`) vignette. @@ -333,9 +334,9 @@ Examples of these yamls can be found above in the `Module Configuration` section ### `/4_modules` -The modules directory contains the final workflow(s) of the reporting pipeline. These workflows each produce a single output based on the data tables that have been produced in the previous directories. These module workflows will contain all of the necessary meta information, as detailed in the `Module Configuration` section above, along with the data tables required, and steps to produce it, so that `gsm::RunWorkflow()` can take this workflow and produce the module output. +The modules directory contains the final workflow(s) of the reporting pipeline. These workflows each produce a single output based on the data tables that have been produced in the previous directories. These module workflows will contain all of the necessary meta information, as detailed in the `Module Configuration` section above, along with the data tables required, and steps to produce it, so that `gsm.core::RunWorkflow()` can take this workflow and produce the module output. -Below is an example of the module yaml workflow for the KRI Site Report in `{gsm}` +Below is an example of the module yaml workflow for the KRI Site Report in `{gsm.kri}` ``` meta: @@ -344,11 +345,11 @@ meta: Output: html Name: Site-Level Key Risk Indicator Report Description: A report summarizing key risk indicators at the site level - Repo: gsm v2.1.0 + Repo: gsm.kri v1.0.0 Status: Qualified Permission: Users Outputs: An html report - ExampleURL: https://gilead-biostats.github.io/gsm/report_kri_site.html + ExampleURL: https://gilead-biostats.github.io/gsm.kri/report_kri_site.html spec: Reporting_Results: _all: @@ -363,24 +364,24 @@ spec: _all: required: true steps: - - name: RunQuery + - name: gsm.core::RunQuery output: Reporting_Results_Site params: df: Reporting_Results strQuery: "SELECT * FROM df WHERE GroupLevel == 'Site'" - - name: RunQuery + - name: gsm.core::RunQuery output: Reporting_Metrics_Site params: df: Reporting_Metrics strQuery: "SELECT * FROM df WHERE GroupLevel == 'Site'" - - name: MakeCharts + - name: gsm.kri::MakeCharts output: lCharts_Site params: dfResults: Reporting_Results_Site dfGroups: Reporting_Groups dfBounds: Reporting_Bounds dfMetrics: Reporting_Metrics_Site - - name: Report_KRI + - name: gsm.kri::Report_KRI output: lReport params: lCharts: lCharts_Site