From 54f61fcfe6e52a6fac122aca863a054658115018 Mon Sep 17 00:00:00 2001 From: Yamina Boukari <87201452+YaminaB@users.noreply.github.com> Date: Thu, 2 Apr 2026 12:55:50 +0000 Subject: [PATCH 1/2] amended denominator criteria for annual counts --- analysis/utilities.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/analysis/utilities.py b/analysis/utilities.py index 772effa..496f5cd 100644 --- a/analysis/utilities.py +++ b/analysis/utilities.py @@ -14,10 +14,31 @@ def build_common_vars(INTERVAL): # ------------------- was_alive_on_1Jan = patients.is_alive_on(INTERVAL.start_date) - was_registered_on1Jan = ( - practice_registrations.for_patient_on(INTERVAL.start_date) - .exists_for_patient() - ) + was_registered_at_any_point_during_interval = practice_registrations.where( + # registered for the entire interval + ((practice_registrations.start_date.is_on_or_before(INTERVAL.start_date)) + & (practice_registrations.end_date.is_on_or_after(INTERVAL.end_date))) | + + # registered during the interval and end date is after the interval end date + ((practice_registrations.start_date.is_after(INTERVAL.start_date)) + & (practice_registrations.end_date.is_on_or_after(INTERVAL.end_date))) | + + # registered before the interval and registration is ongoing + ((practice_registrations.start_date.is_on_or_before(INTERVAL.start_date)) & + (practice_registrations.end_date.is_null())) | + + # registered after interval start date and registration is ongoing + ((practice_registrations.start_date.is_after(INTERVAL.start_date)) & + (practice_registrations.end_date.is_null())) | + + # registered before the interval start date and end date is before the end date, but after the start date + ((practice_registrations.start_date.is_before(INTERVAL.start_date)) & + (practice_registrations.end_date.is_between_but_not_on(INTERVAL.start_date, INTERVAL.end_date))) | + + # registered for part of the interval only + ((practice_registrations.start_date.is_between_but_not_on(INTERVAL.start_date, INTERVAL.end_date)) & + (practice_registrations.end_date.is_between_but_not_on(INTERVAL.start_date, INTERVAL.end_date))) + ).exists_for_patient() has_recorded_sex = patients.sex.is_in(["male", "female"]) @@ -28,7 +49,7 @@ def build_common_vars(INTERVAL): denominator = ( was_alive_on_1Jan - & was_registered_on1Jan + & was_registered_at_any_point_during_interval & has_recorded_sex & has_possible_age ) From 8eae6ef5c838e9b0f994f8bc71842702f10757ed Mon Sep 17 00:00:00 2001 From: Yamina Boukari <87201452+YaminaB@users.noreply.github.com> Date: Thu, 2 Apr 2026 14:03:16 +0000 Subject: [PATCH 2/2] fixed overall row in date of uk entry cohort file --- analysis/process_date_of_uk_entry_cohort.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/analysis/process_date_of_uk_entry_cohort.R b/analysis/process_date_of_uk_entry_cohort.R index 33f7726..cfa8c60 100644 --- a/analysis/process_date_of_uk_entry_cohort.R +++ b/analysis/process_date_of_uk_entry_cohort.R @@ -25,7 +25,6 @@ output_file <- "output/tables/demographics_date_of_uk_entry_cohort.csv" args <- commandArgs(trailingOnly=TRUE) print(commandArgs(trailingOnly=TRUE)) - # Import data ---- cohort <- read_feather(cohort_file) %>% mutate( @@ -45,7 +44,6 @@ vars_to_summarise <- c( "imd_quintile" ) - rounding <- function(vars) { case_when(vars == 0 ~ 0, vars > 7 ~ round(vars / 5) * 5) @@ -54,7 +52,7 @@ rounding <- function(vars) { table_freq_overall <- cohort %>% group_by(any_migrant) %>% summarise( - n = rounding(nrow(cohort)), + n = rounding(n()), percentage = 100) %>% mutate( subgroup = "All",