OHDSI · anthonysena · Apr 8, 2025 · Apr 25, 2025 · May 8, 2025
diff --git a/NAMESPACE b/NAMESPACE
@@ -30,6 +30,7 @@ export(createResultsDataModelSettings)
 export(createResultsExecutionSettings)
 export(execute)
 export(getCdmDatabaseMetaData)
+export(partitionModule)
 export(uploadResults)
 export(zipResults)
 import(CohortGenerator)

diff --git a/R/Module-Characterization.R b/R/Module-Characterization.R
@@ -263,7 +263,130 @@ CharacterizationModule <- R6::R6Class(
         )
       )
       return(specifications)
-    }
+    },
+    #' @description Summarize the uploaded results for the module
+    #' @template resultsConnectionDetails
+    #' @template resultsDataModelSettings
+    summarizeResults = function(resultsConnectionDetails, resultsDataModelSettings) {
+      # initialize checks
+
+      schema <- resultsDataModelSettings$resultsDatabaseSchema
+      prefix <- self$tablePrefix
+
+      checks <- c()
+
+      # connect to resultsConnectionDetails
+      connectionHandler <- ResultModelManager::ConnectionHandler$new(
+        connectionDetails = resultsConnectionDetails
+      )
+
+      # get time to event database count
+      result <- connectionHandler$queryDb(
+        "select count(distinct database_id) as N from @schema.@prefixtime_to_event",
+        schema = schema,
+        prefix = prefix
+      )
+      checks <- rbind(checks, data.frame(table = 'time_to_event',database = '-', check = 'database count', value = result$n))
+
+      # get dechall-rechall database count
+      result <- connectionHandler$queryDb(
+        "select count(distinct database_id) as N from @schema.@prefixdechallenge_rechallenge",
+        schema = schema,
+        prefix = prefix
+      )
+      checks <- rbind(checks, data.frame(table = 'dechallenge_rechallenge',database = '-', check = 'database count', value = result$n))
+
+      # get covariates database count
+      result <- connectionHandler$queryDb(
+        "select count(distinct database_id) as N from @schema.@prefixcohort_counts",
+        schema = schema,
+        prefix = prefix
+      )
+      checks <- rbind(checks, data.frame(table = 'covariates',database = '-', check = 'database count', value = result$n))
+
+      # check target and outcomes cohorts per database
+      result <- connectionHandler$queryDb(
+        'select database_id,
+         count(distinct target_cohort_definition_id) as t_n,
+         count(distinct outcome_cohort_definition_id) as o_n
+
+         from @schema.@prefixtime_to_event
+         group by database_id;',
+        schema = schema,
+        prefix = prefix)
+      checks <- rbind(checks, data.frame(table = 'time_to_event',database = result$databaseId, check = 'target count', value = result$tN))
+      checks <- rbind(checks, data.frame(table = 'time_to_event',database = result$databaseId, check = 'outcome count', value = result$oN))
+
+      # check target and outcomes cohorts per database
+      result <- connectionHandler$queryDb(
+        'select database_id,
+         count(distinct target_cohort_definition_id) as t_n,
+         count(distinct outcome_cohort_definition_id) as o_n
+
+         from @schema.@prefixdechallenge_rechallenge
+         group by database_id;',
+        schema = schema,
+        prefix = prefix)
+      checks <- rbind(checks, data.frame(table = 'dechallenge_rechallenge',database = result$databaseId, check = 'target count', value = result$tN))
+      checks <- rbind(checks, data.frame(table = 'dechallenge_rechallenge',database = result$databaseId, check = 'outcome count', value = result$oN))
+
+      # check target and outcomes cohorts per database
+      result <- connectionHandler$queryDb(
+        'select database_id,
+         count(distinct target_cohort_id) as t_n,
+         count(distinct outcome_cohort_id) as o_n
+         from @schema.@prefixcohort_counts
+         group by database_id;',
+        schema = schema,
+        prefix = prefix)
+      checks <- rbind(checks, data.frame(table = 'covariates',database = result$databaseId, check = 'target count', value = result$tN))
+      checks <- rbind(checks, data.frame(table = 'covariates',database = result$databaseId, check = 'outcome count', value = result$oN))
+
+      message('Characterization uploaded result summary:')
+      # print out the checksprint(checks)
+
+      return(checks)
+    },
+#' @description Partions the module specifications into smaller jobs
+#' @template analysisSpecifications
+#' @param specificationFolder A directory where the partitioned jsons will be saved to
+partitionModuleSpecifications = function(analysisSpecifications, specificationFolder) {
+
+  moduleVector <- unlist(lapply(analysisSpecifications$moduleSpecifications, function(ms) ms$module))
+  selfInd <- which(moduleVector == self$moduleName)
+  if(sum(selfInd) == 0){
+    message(paste0('No specification found for ',self$moduleName))
+    invisible(return(FALSE))
+  }
+  selfSpecification <- analysisSpecifications$moduleSpecifications[[selfInd]]
+
+  # save the full spec as it is because we do not need to split
+  # create base setting with just shared resources and self spec
+  baseSettings <- list(
+    sharedResources = analysisSpecifications$sharedResources,
+    moduleSpecifications = list(selfSpecification)
+  )
+
+  specHashId <- digest::digest2int(
+    x = as.character(ParallelLogger::convertSettingsToJson(selfSpecification))
+    )
+
+  # now save the fill json spec
+  if(!dir.exists(specificationFolder)){
+    dir.create(specificationFolder, recursive = T)
+  }
+
+  # save as spec_1.json - same name for each module but will be
+  # in a different folder
+  ParallelLogger::saveSettingsToJson(
+    object = baseSettings,
+    fileName = file.path(specificationFolder, paste0('spec_',specHashId,'.json'))
+  )
+
+  # TODO: could return the parititioned modelDesigns or the list of tempSettings
+  #       or a status/message
+  invisible(return(file.path(specificationFolder, paste0('spec_',specHashId,'.json'))))
+}
   ),
   private = list(
     .getResultsDataModelSpecification = function(tablePrefix = self$tablePrefix) {

diff --git a/R/Module-CohortDiagnostics.R b/R/Module-CohortDiagnostics.R
@@ -169,6 +169,46 @@ CohortDiagnosticsModule <- R6::R6Class(
       super$validateModuleSpecifications(
         moduleSpecifications = moduleSpecifications
       )
+      },
+    #' @description Paritions the module specifications into smaller jobs
+    #' @template analysisSpecifications
+    #' @param specificationFolder A directory where the partitioned jsons will be saved to
+    partitionModuleSpecifications = function(analysisSpecifications, specificationFolder) {
+
+      moduleVector <- unlist(lapply(analysisSpecifications$moduleSpecifications, function(ms) ms$module))
+      selfInd <- which(moduleVector == self$moduleName)
+      if(sum(selfInd) == 0){
+        message(paste0('No specification found for ',self$moduleName))
+        invisible(return(FALSE))
+      }
+      selfSpecification <- analysisSpecifications$moduleSpecifications[[selfInd]]
+
+      # save the full spec as it is because we do not need to split
+      # create base setting with just shared resources and self spec
+      baseSettings <- list(
+        sharedResources = analysisSpecifications$sharedResources,
+        moduleSpecifications = selfSpecification
+      )
+
+      specHashId <- digest::digest2int(
+        x = as.character(ParallelLogger::convertSettingsToJson(selfSpecification))
+      )
+
+      # now save the fill json spec
+      if(!dir.exists(specificationFolder)){
+        dir.create(specificationFolder, recursive = T)
+      }
+
+      # save as spec_1.json - same name for each module but will be
+      # in a different folder
+      ParallelLogger::saveSettingsToJson(
+        object = baseSettings,
+        fileName = file.path(specificationFolder, paste0('spec_',specHashId,'.json'))
+      )
+
+      # TODO: could return the parititioned modelDesigns or the list of tempSettings
+      #       or a status/message
+      invisible(return(file.path(specificationFolder, paste0('spec_',specHashId,'.json'))))
     }
   ),
   private = list(

diff --git a/R/Module-CohortGenerator.R b/R/Module-CohortGenerator.R
@@ -205,6 +205,47 @@ CohortGeneratorModule <- R6::R6Class(
         className = self$negativeControlOutcomeSharedResourcesClassName,
         sharedResourcesSpecifications = negativeControlOutcomeCohortSharedResourceSpecifications
       )
+    },
+    #' @description Partitions the module specifications into smaller jobs
+    #' @template analysisSpecifications
+    #' @param specificationFolder A directory where the partitioned jsons will be saved to
+    partitionModuleSpecifications = function(analysisSpecifications, specificationFolder) {
+
+      moduleVector <- unlist(lapply(analysisSpecifications$moduleSpecifications, function(ms) ms$module))
+      selfInd <- moduleVector == self$moduleName
+      if(sum(selfInd) == 0){
+        message(paste0('No specification found for ',self$moduleName))
+        invisible(return(FALSE))
+      }
+      selfSpecification <- analysisSpecifications$moduleSpecifications[selfInd]
+
+      specHashId <- digest::digest2int(
+        x = as.character(ParallelLogger::convertSettingsToJson(selfSpecification))
+      )
+
+      # save the CohortGenerator as it is because we do not need to split
+      # create base setting with just shared resources and self spec
+      baseSettings <- list(
+        sharedResources = analysisSpecifications$sharedResources,
+        moduleSpecifications = selfSpecification
+      )
+
+      # now save the cohort generator json spec
+      if(!dir.exists(specificationFolder)){
+        dir.create(specificationFolder, recursive = T)
+      }
+
+      # save as spec_1.json - same name for each module but will be
+      # in a different folder
+      ParallelLogger::saveSettingsToJson(
+        object = baseSettings,
+        fileName = file.path(specificationFolder, paste0('spec_',specHashId,'.json'))
+      )
+
+
+      # TODO: could return the parititioned modelDesigns or the list of tempSettings
+      #       or a status/message
+      invisible(return(file.path(specificationFolder, paste0('spec_',specHashId,'.json'))))
     }
   ),
   private = list(

diff --git a/R/Module-CohortIncidence.R b/R/Module-CohortIncidence.R
@@ -175,6 +175,47 @@ CohortIncidenceModule <- R6::R6Class(
       designJson <- rJava::J("org.ohdsi.analysis.cohortincidence.design.CohortIncidence")$fromJson(as.character(irDesign$asJSON()))
 
       invisible(designJson)
+    },
+    #' @description Partitions the module specifications into smaller jobs
+    #' @template analysisSpecifications
+    #' @param specificationFolder A directory where the partitioned jsons will be saved to
+    partitionModuleSpecifications = function(analysisSpecifications, specificationFolder) {
+
+      moduleVector <- unlist(lapply(analysisSpecifications$moduleSpecifications, function(ms) ms$module))
+      selfInd <- moduleVector == self$moduleName
+      if(sum(selfInd) == 0){
+        message(paste0('No specification found for ',self$moduleName))
+        invisible(return(FALSE))
+      }
+      selfSpecification <- analysisSpecifications$moduleSpecifications[selfInd]
+
+      specHashId <- digest::digest2int(
+        x = as.character(ParallelLogger::convertSettingsToJson(selfSpecification))
+      )
+
+      # save the CohortGenerator as it is because we do not need to split
+      # create base setting with just shared resources and self spec
+      baseSettings <- list(
+        sharedResources = analysisSpecifications$sharedResources,
+        moduleSpecifications = selfSpecification
+      )
+
+      # now save the cohort generator json spec
+      if(!dir.exists(specificationFolder)){
+        dir.create(specificationFolder, recursive = T)
+      }
+
+      # save as spec_1.json - same name for each module but will be
+      # in a different folder
+      ParallelLogger::saveSettingsToJson(
+        object = baseSettings,
+        fileName = file.path(specificationFolder, paste0('spec_',specHashId,'.json'))
+      )
+
+
+      # TODO: could return the parititioned modelDesigns or the list of tempSettings
+      #       or a status/message
+      invisible(return(file.path(specificationFolder, paste0('spec_',specHashId,'.json'))))
     }
   ),
   private = list(

diff --git a/R/Module-CohortMethod.R b/R/Module-CohortMethod.R
@@ -190,6 +190,69 @@ CohortMethodModule <- R6::R6Class(
       super$validateModuleSpecifications(
         moduleSpecifications = moduleSpecifications
       )
+    },
+    #' @description Partitions the module specifications into smaller jobs
+    #' @template analysisSpecifications
+    #' @param specificationFolder A directory where the partitioned jsons will be saved to
+    partitionModuleSpecifications = function(analysisSpecifications, specificationFolder) {
+
+      moduleVector <- unlist(lapply(analysisSpecifications$moduleSpecifications, function(ms) ms$module))
+      selfInd <- which(moduleVector == self$moduleName)
+      if(length(selfInd) == 0){
+        message(paste0('No specification found for ',self$moduleName))
+        invisible(return(FALSE))
+      }
+      selfSpecification <- analysisSpecifications$moduleSpecifications[[selfInd]]
+
+      # selfSpecification$settings$targetComparatorOutcomesList
+      #                            cmAnalysisList
+      #                            refitPsForEveryOutcome
+      #                            refitPsForEveryStudyPopulation
+      #                            cmDiagnosticThresholds
+
+      targetIds <- unlist(lapply(selfSpecification$settings$targetComparatorOutcomesList, function(tco) tco$targetId))
+
+      # split up selfSpecification$settings$targetComparatorOutcomesList
+
+      # for each uniqueTargetIds extract out the targetComparatorOutcomesList
+      # for the targetId
+      listOfTCO <- lapply(
+        X = unique(targetIds),
+        FUN = function(tId){
+          selfSpecification$settings$targetComparatorOutcomesList[which(tId == targetIds)]
+        })
+
+      # create base setting with just shared resources and self spec
+      baseSettings <- list(
+        sharedResources = analysisSpecifications$sharedResources,
+        moduleSpecifications = list(selfSpecification)
+      )
+
+      # now save each json spec
+      if(!dir.exists(specificationFolder)){
+        dir.create(specificationFolder, recursive = T)
+      }
+
+      fileVector <- c()
+      for(i in 1:length(listOfTCO)){
+        tempSettings <- baseSettings
+        tempSettings$moduleSpecifications[[1]]$settings$targetComparatorOutcomesList <- listOfTCO[[i]]
+
+        specHashId <- digest::digest2int(
+          x = as.character(ParallelLogger::convertSettingsToJson(tempSettings$moduleSpecifications))
+        )
+        tempFilePath <- file.path(specificationFolder, paste0('spec_',unique(targetIds)[i],'_',specHashId,'.json'))
+
+        fileVector <- c(fileVector,tempFilePath)
+        # save as spec_i.json - same name for each module but will be
+        # in a different folder
+        ParallelLogger::saveSettingsToJson(
+          object = tempSettings,
+          fileName = tempFilePath
+        )
+      }
+
+      invisible(return(fileVector))
     }
   )
 )