Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export(createResultsDataModelSettings)
export(createResultsExecutionSettings)
export(execute)
export(getCdmDatabaseMetaData)
export(partitionModule)
export(uploadResults)
export(zipResults)
import(CohortGenerator)
Expand Down
125 changes: 124 additions & 1 deletion R/Module-Characterization.R
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,130 @@ CharacterizationModule <- R6::R6Class(
)
)
return(specifications)
}
},
#' @description Summarize the uploaded results for the module
#' @template resultsConnectionDetails
#' @template resultsDataModelSettings
summarizeResults = function(resultsConnectionDetails, resultsDataModelSettings) {
# initialize checks

schema <- resultsDataModelSettings$resultsDatabaseSchema
prefix <- self$tablePrefix

checks <- c()

# connect to resultsConnectionDetails
connectionHandler <- ResultModelManager::ConnectionHandler$new(
connectionDetails = resultsConnectionDetails
)

# get time to event database count
result <- connectionHandler$queryDb(
"select count(distinct database_id) as N from @schema.@prefixtime_to_event",
schema = schema,
prefix = prefix
)
checks <- rbind(checks, data.frame(table = 'time_to_event',database = '-', check = 'database count', value = result$n))

# get dechall-rechall database count
result <- connectionHandler$queryDb(
"select count(distinct database_id) as N from @schema.@prefixdechallenge_rechallenge",
schema = schema,
prefix = prefix
)
checks <- rbind(checks, data.frame(table = 'dechallenge_rechallenge',database = '-', check = 'database count', value = result$n))

# get covariates database count
result <- connectionHandler$queryDb(
"select count(distinct database_id) as N from @schema.@prefixcohort_counts",
schema = schema,
prefix = prefix
)
checks <- rbind(checks, data.frame(table = 'covariates',database = '-', check = 'database count', value = result$n))

# check target and outcomes cohorts per database
result <- connectionHandler$queryDb(
'select database_id,
count(distinct target_cohort_definition_id) as t_n,
count(distinct outcome_cohort_definition_id) as o_n

from @schema.@prefixtime_to_event
group by database_id;',
schema = schema,
prefix = prefix)
checks <- rbind(checks, data.frame(table = 'time_to_event',database = result$databaseId, check = 'target count', value = result$tN))
checks <- rbind(checks, data.frame(table = 'time_to_event',database = result$databaseId, check = 'outcome count', value = result$oN))

# check target and outcomes cohorts per database
result <- connectionHandler$queryDb(
'select database_id,
count(distinct target_cohort_definition_id) as t_n,
count(distinct outcome_cohort_definition_id) as o_n

from @schema.@prefixdechallenge_rechallenge
group by database_id;',
schema = schema,
prefix = prefix)
checks <- rbind(checks, data.frame(table = 'dechallenge_rechallenge',database = result$databaseId, check = 'target count', value = result$tN))
checks <- rbind(checks, data.frame(table = 'dechallenge_rechallenge',database = result$databaseId, check = 'outcome count', value = result$oN))

# check target and outcomes cohorts per database
result <- connectionHandler$queryDb(
'select database_id,
count(distinct target_cohort_id) as t_n,
count(distinct outcome_cohort_id) as o_n
from @schema.@prefixcohort_counts
group by database_id;',
schema = schema,
prefix = prefix)
checks <- rbind(checks, data.frame(table = 'covariates',database = result$databaseId, check = 'target count', value = result$tN))
checks <- rbind(checks, data.frame(table = 'covariates',database = result$databaseId, check = 'outcome count', value = result$oN))

message('Characterization uploaded result summary:')
# print out the checksprint(checks)

return(checks)
},
#' @description Partions the module specifications into smaller jobs
#' @template analysisSpecifications
#' @param specificationFolder A directory where the partitioned jsons will be saved to
partitionModuleSpecifications = function(analysisSpecifications, specificationFolder) {

moduleVector <- unlist(lapply(analysisSpecifications$moduleSpecifications, function(ms) ms$module))
selfInd <- which(moduleVector == self$moduleName)
if(sum(selfInd) == 0){
message(paste0('No specification found for ',self$moduleName))
invisible(return(FALSE))
}
selfSpecification <- analysisSpecifications$moduleSpecifications[[selfInd]]

# save the full spec as it is because we do not need to split
# create base setting with just shared resources and self spec
baseSettings <- list(
sharedResources = analysisSpecifications$sharedResources,
moduleSpecifications = list(selfSpecification)
)

specHashId <- digest::digest2int(
x = as.character(ParallelLogger::convertSettingsToJson(selfSpecification))
)

# now save the fill json spec
if(!dir.exists(specificationFolder)){
dir.create(specificationFolder, recursive = T)
}

# save as spec_1.json - same name for each module but will be
# in a different folder
ParallelLogger::saveSettingsToJson(
object = baseSettings,
fileName = file.path(specificationFolder, paste0('spec_',specHashId,'.json'))
)

# TODO: could return the parititioned modelDesigns or the list of tempSettings
# or a status/message
invisible(return(file.path(specificationFolder, paste0('spec_',specHashId,'.json'))))
}
),
private = list(
.getResultsDataModelSpecification = function(tablePrefix = self$tablePrefix) {
Expand Down
40 changes: 40 additions & 0 deletions R/Module-CohortDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,46 @@ CohortDiagnosticsModule <- R6::R6Class(
super$validateModuleSpecifications(
moduleSpecifications = moduleSpecifications
)
},
#' @description Paritions the module specifications into smaller jobs
#' @template analysisSpecifications
#' @param specificationFolder A directory where the partitioned jsons will be saved to
partitionModuleSpecifications = function(analysisSpecifications, specificationFolder) {

moduleVector <- unlist(lapply(analysisSpecifications$moduleSpecifications, function(ms) ms$module))
selfInd <- which(moduleVector == self$moduleName)
if(sum(selfInd) == 0){
message(paste0('No specification found for ',self$moduleName))
invisible(return(FALSE))
}
selfSpecification <- analysisSpecifications$moduleSpecifications[[selfInd]]

# save the full spec as it is because we do not need to split
# create base setting with just shared resources and self spec
baseSettings <- list(
sharedResources = analysisSpecifications$sharedResources,
moduleSpecifications = selfSpecification
)

specHashId <- digest::digest2int(
x = as.character(ParallelLogger::convertSettingsToJson(selfSpecification))
)

# now save the fill json spec
if(!dir.exists(specificationFolder)){
dir.create(specificationFolder, recursive = T)
}

# save as spec_1.json - same name for each module but will be
# in a different folder
ParallelLogger::saveSettingsToJson(
object = baseSettings,
fileName = file.path(specificationFolder, paste0('spec_',specHashId,'.json'))
)

# TODO: could return the parititioned modelDesigns or the list of tempSettings
# or a status/message
invisible(return(file.path(specificationFolder, paste0('spec_',specHashId,'.json'))))
}
),
private = list(
Expand Down
41 changes: 41 additions & 0 deletions R/Module-CohortGenerator.R
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,47 @@ CohortGeneratorModule <- R6::R6Class(
className = self$negativeControlOutcomeSharedResourcesClassName,
sharedResourcesSpecifications = negativeControlOutcomeCohortSharedResourceSpecifications
)
},
#' @description Partitions the module specifications into smaller jobs
#' @template analysisSpecifications
#' @param specificationFolder A directory where the partitioned jsons will be saved to
partitionModuleSpecifications = function(analysisSpecifications, specificationFolder) {

moduleVector <- unlist(lapply(analysisSpecifications$moduleSpecifications, function(ms) ms$module))
selfInd <- moduleVector == self$moduleName
if(sum(selfInd) == 0){
message(paste0('No specification found for ',self$moduleName))
invisible(return(FALSE))
}
selfSpecification <- analysisSpecifications$moduleSpecifications[selfInd]

specHashId <- digest::digest2int(
x = as.character(ParallelLogger::convertSettingsToJson(selfSpecification))
)

# save the CohortGenerator as it is because we do not need to split
# create base setting with just shared resources and self spec
baseSettings <- list(
sharedResources = analysisSpecifications$sharedResources,
moduleSpecifications = selfSpecification
)

# now save the cohort generator json spec
if(!dir.exists(specificationFolder)){
dir.create(specificationFolder, recursive = T)
}

# save as spec_1.json - same name for each module but will be
# in a different folder
ParallelLogger::saveSettingsToJson(
object = baseSettings,
fileName = file.path(specificationFolder, paste0('spec_',specHashId,'.json'))
)


# TODO: could return the parititioned modelDesigns or the list of tempSettings
# or a status/message
invisible(return(file.path(specificationFolder, paste0('spec_',specHashId,'.json'))))
}
),
private = list(
Expand Down
41 changes: 41 additions & 0 deletions R/Module-CohortIncidence.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,47 @@ CohortIncidenceModule <- R6::R6Class(
designJson <- rJava::J("org.ohdsi.analysis.cohortincidence.design.CohortIncidence")$fromJson(as.character(irDesign$asJSON()))

invisible(designJson)
},
#' @description Partitions the module specifications into smaller jobs
#' @template analysisSpecifications
#' @param specificationFolder A directory where the partitioned jsons will be saved to
partitionModuleSpecifications = function(analysisSpecifications, specificationFolder) {

moduleVector <- unlist(lapply(analysisSpecifications$moduleSpecifications, function(ms) ms$module))
selfInd <- moduleVector == self$moduleName
if(sum(selfInd) == 0){
message(paste0('No specification found for ',self$moduleName))
invisible(return(FALSE))
}
selfSpecification <- analysisSpecifications$moduleSpecifications[selfInd]

specHashId <- digest::digest2int(
x = as.character(ParallelLogger::convertSettingsToJson(selfSpecification))
)

# save the CohortGenerator as it is because we do not need to split
# create base setting with just shared resources and self spec
baseSettings <- list(
sharedResources = analysisSpecifications$sharedResources,
moduleSpecifications = selfSpecification
)

# now save the cohort generator json spec
if(!dir.exists(specificationFolder)){
dir.create(specificationFolder, recursive = T)
}

# save as spec_1.json - same name for each module but will be
# in a different folder
ParallelLogger::saveSettingsToJson(
object = baseSettings,
fileName = file.path(specificationFolder, paste0('spec_',specHashId,'.json'))
)


# TODO: could return the parititioned modelDesigns or the list of tempSettings
# or a status/message
invisible(return(file.path(specificationFolder, paste0('spec_',specHashId,'.json'))))
}
),
private = list(
Expand Down
63 changes: 63 additions & 0 deletions R/Module-CohortMethod.R
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,69 @@ CohortMethodModule <- R6::R6Class(
super$validateModuleSpecifications(
moduleSpecifications = moduleSpecifications
)
},
#' @description Partitions the module specifications into smaller jobs
#' @template analysisSpecifications
#' @param specificationFolder A directory where the partitioned jsons will be saved to
partitionModuleSpecifications = function(analysisSpecifications, specificationFolder) {

moduleVector <- unlist(lapply(analysisSpecifications$moduleSpecifications, function(ms) ms$module))
selfInd <- which(moduleVector == self$moduleName)
if(length(selfInd) == 0){
message(paste0('No specification found for ',self$moduleName))
invisible(return(FALSE))
}
selfSpecification <- analysisSpecifications$moduleSpecifications[[selfInd]]

# selfSpecification$settings$targetComparatorOutcomesList
# cmAnalysisList
# refitPsForEveryOutcome
# refitPsForEveryStudyPopulation
# cmDiagnosticThresholds

targetIds <- unlist(lapply(selfSpecification$settings$targetComparatorOutcomesList, function(tco) tco$targetId))

# split up selfSpecification$settings$targetComparatorOutcomesList

# for each uniqueTargetIds extract out the targetComparatorOutcomesList
# for the targetId
listOfTCO <- lapply(
X = unique(targetIds),
FUN = function(tId){
selfSpecification$settings$targetComparatorOutcomesList[which(tId == targetIds)]
})

# create base setting with just shared resources and self spec
baseSettings <- list(
sharedResources = analysisSpecifications$sharedResources,
moduleSpecifications = list(selfSpecification)
)

# now save each json spec
if(!dir.exists(specificationFolder)){
dir.create(specificationFolder, recursive = T)
}

fileVector <- c()
for(i in 1:length(listOfTCO)){
tempSettings <- baseSettings
tempSettings$moduleSpecifications[[1]]$settings$targetComparatorOutcomesList <- listOfTCO[[i]]

specHashId <- digest::digest2int(
x = as.character(ParallelLogger::convertSettingsToJson(tempSettings$moduleSpecifications))
)
tempFilePath <- file.path(specificationFolder, paste0('spec_',unique(targetIds)[i],'_',specHashId,'.json'))

fileVector <- c(fileVector,tempFilePath)
# save as spec_i.json - same name for each module but will be
# in a different folder
ParallelLogger::saveSettingsToJson(
object = tempSettings,
fileName = tempFilePath
)
}

invisible(return(fileVector))
}
)
)
Loading
Loading