From 5aedf5ae41fd2f71edef911068fdc7ac9f3b412a Mon Sep 17 00:00:00 2001 From: Yamina Boukari <87201452+YaminaB@users.noreply.github.com> Date: Wed, 8 Apr 2026 12:31:51 +0000 Subject: [PATCH] Added all remaining with date of uk entry code --- .github/workflows/test_runner.yaml | 2 +- .../dataset_definition_full_study_cohort.py | 21 ++++++++++++++++ analysis/migration_coding.R | 25 ++++++++++++++++++- analysis/migration_coding_combinations.R | 4 ++- ...process_full_cohort_data_migration_types.R | 1 + project.yaml | 25 ++++++++++++++++++- 6 files changed, 74 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test_runner.yaml b/.github/workflows/test_runner.yaml index 63450c3..c8f90b4 100644 --- a/.github/workflows/test_runner.yaml +++ b/.github/workflows/test_runner.yaml @@ -12,6 +12,6 @@ jobs: name: Test the project can run, using dummy data steps: - name: Checkout - uses: actions/checkout@v5 + uses: actions/checkout@v6 - name: Test that the project is runnable uses: opensafely-core/research-action@v2 diff --git a/analysis/dataset_definition_full_study_cohort.py b/analysis/dataset_definition_full_study_cohort.py index 59484e6..86e3c1b 100644 --- a/analysis/dataset_definition_full_study_cohort.py +++ b/analysis/dataset_definition_full_study_cohort.py @@ -248,12 +248,27 @@ dataset.date_of_first_migration_code = date_of_first_migration_code +date_of_first_migration_code_withdoe = ( + clinical_events.where((clinical_events.snomedct_code.is_in(codelists.all_migrant_codes)) | (clinical_events.snomedct_code.is_in(date_of_entry_code))) + .where(clinical_events.date.is_on_or_between(patients.date_of_birth, study_end_date)) + .where((clinical_events.date.is_on_or_before(patients.date_of_death)) | (patients.date_of_death.is_null())) + .sort_by(clinical_events.date) + .first_for_patient().date) + +dataset.date_of_first_migration_code_withdoe = date_of_first_migration_code_withdoe + time_from_1st_pracreg_first_migration_code_days = (date_of_first_migration_code - date_of_first_practice_registration).days time_from_1st_pracreg_first_migration_code_months = (date_of_first_migration_code - date_of_first_practice_registration).months dataset.time_from_1st_pracreg_first_migration_code_days = time_from_1st_pracreg_first_migration_code_days dataset.time_from_1st_pracreg_first_migration_code_months = time_from_1st_pracreg_first_migration_code_months +time_from_1st_pracreg_first_migration_code_days_withdoe = (date_of_first_migration_code_withdoe - date_of_first_practice_registration).days +time_from_1st_pracreg_first_migration_code_months_withdoe = (date_of_first_migration_code_withdoe - date_of_first_practice_registration).months + +dataset.time_from_1st_pracreg_first_migration_code_days_withdoe = time_from_1st_pracreg_first_migration_code_days_withdoe +dataset.time_from_1st_pracreg_first_migration_code_months_withdoe = time_from_1st_pracreg_first_migration_code_months_withdoe + # time from birth to first migration code time_from_birth_first_migration_code_days = (date_of_first_migration_code - patients.date_of_birth).days @@ -262,6 +277,12 @@ dataset.time_from_birth_first_migration_code_days = time_from_birth_first_migration_code_days dataset.time_from_birth_first_migration_code_months = time_from_birth_first_migration_code_months +time_from_birth_first_migration_code_days_withdoe = (date_of_first_migration_code_withdoe - patients.date_of_birth).days +time_from_birth_first_migration_code_months_withdoe = (date_of_first_migration_code_withdoe - patients.date_of_birth).months + +dataset.time_from_birth_first_migration_code_days_withdoe = time_from_birth_first_migration_code_days_withdoe +dataset.time_from_birth_first_migration_code_months_withdoe = time_from_birth_first_migration_code_months_withdoe + dataset.configure_dummy_data(population_size=1000) show(dataset) diff --git a/analysis/migration_coding.R b/analysis/migration_coding.R index 57242ee..24a2fee 100644 --- a/analysis/migration_coding.R +++ b/analysis/migration_coding.R @@ -40,7 +40,10 @@ vars_to_summarise <- c( mig_vars <- c( "mig_status_2_cat", "mig_status_3_cat", - "mig_status_6_cat") + "mig_status_6_cat", + "mig_status_2_cat_withdoe", + "mig_status_3_cat_withdoe", + "mig_status_6_cat_withdoe") rounding <- function(vars) { case_when(vars == 0 ~ 0, @@ -56,10 +59,17 @@ migration_coding_summary <- cohort %>% group_by(migration_scheme, migration_status) %>% summarise( n = rounding(n()), + # excluding date of uk entry code total_migration_codes = rounding(sum(number_of_migration_codes)), median_migration_codes = median(number_of_migration_codes, na.rm = TRUE), q25_migration_codes = quantile(number_of_migration_codes, 0.25, na.rm = TRUE), q75_migration_codes = quantile(number_of_migration_codes, 0.75, na.rm = TRUE), + # including date of uk entry code + total_migration_codes_withdoe = rounding(sum(number_of_migration_codes_withdoe)), + median_migration_codes_withdoe = median(number_of_migration_codes_withdoe, na.rm = TRUE), + q25_migration_codes_withdoe = quantile(number_of_migration_codes_withdoe, 0.25, na.rm = TRUE), + q75_migration_codes_withdoe = quantile(number_of_migration_codes_withdoe, 0.75, na.rm = TRUE), + # excluding date of uk entry code median_time_from_1st_pracreg_first_migration_code_days = median(time_from_1st_pracreg_first_migration_code_days, na.rm = TRUE), q25_time_from_1st_pracreg_first_migration_code_days = quantile(time_from_1st_pracreg_first_migration_code_days, 0.25, na.rm = TRUE), q75_time_from_1st_pracreg_first_migration_code_days = quantile(time_from_1st_pracreg_first_migration_code_days, 0.75, na.rm = TRUE), @@ -72,6 +82,19 @@ migration_coding_summary <- cohort %>% median_time_from_birth_first_migration_code_months = median(time_from_birth_first_migration_code_months, na.rm = TRUE), q25_time_from_birth_first_migration_code_months = quantile(time_from_birth_first_migration_code_months, 0.25, na.rm = TRUE), q75_time_from_birth_first_migration_code_months = quantile(time_from_birth_first_migration_code_months, 0.75, na.rm = TRUE), + # including date of uk entry code + median_time_from_1st_pracreg_first_migration_code_days_withdoe = median(time_from_1st_pracreg_first_migration_code_days, na.rm = TRUE), + q25_time_from_1st_pracreg_first_migration_code_days_withdoe = quantile(time_from_1st_pracreg_first_migration_code_days, 0.25, na.rm = TRUE), + q75_time_from_1st_pracreg_first_migration_code_days_withdoe = quantile(time_from_1st_pracreg_first_migration_code_days, 0.75, na.rm = TRUE), + median_time_from_1st_pracreg_first_migration_code_months_withdoe = median(time_from_1st_pracreg_first_migration_code_months , na.rm = TRUE), + q25_time_from_1st_pracreg_first_migration_code_months_withdoe = quantile(time_from_1st_pracreg_first_migration_code_months , 0.25, na.rm = TRUE), + q75_time_from_1st_pracreg_first_migration_code_months_withdoe = quantile(time_from_1st_pracreg_first_migration_code_months , 0.75, na.rm = TRUE), + median_time_from_birth_first_migration_code_days_withdoe = median(time_from_birth_first_migration_code_days, na.rm = TRUE), + q25_time_from_birth_first_migration_code_days_withdoe = quantile(time_from_birth_first_migration_code_days, 0.25, na.rm = TRUE), + q75_time_from_birth_first_migration_code_days_withdoe = quantile(time_from_birth_first_migration_code_days, 0.75, na.rm = TRUE), + median_time_from_birth_first_migration_code_months_withdoe = median(time_from_birth_first_migration_code_months, na.rm = TRUE), + q25_time_from_birth_first_migration_code_months_withdoe = quantile(time_from_birth_first_migration_code_months, 0.25, na.rm = TRUE), + q75_time_from_birth_first_migration_code_months_withdoe = quantile(time_from_birth_first_migration_code_months, 0.75, na.rm = TRUE), .groups = "drop" ) diff --git a/analysis/migration_coding_combinations.R b/analysis/migration_coding_combinations.R index 4040fcb..a2816a8 100644 --- a/analysis/migration_coding_combinations.R +++ b/analysis/migration_coding_combinations.R @@ -44,7 +44,9 @@ migration_code_combinations_summary <- cohort %>% if (immig_status_excl_refugee_asylum) "immig_status_excl_refugee_asylum", if (refugee_asylum_status) "refugee_asylum_status", if (english_not_main_language) "english_not_main_language", - if (interpreter_required) "interpreter_required" + if (interpreter_required) "interpreter_required", + if (trafficking) "trafficking", + if (date_of_uk_entry) "date_of_uk_entry" ), collapse = "; " ), diff --git a/analysis/process_full_cohort_data_migration_types.R b/analysis/process_full_cohort_data_migration_types.R index b475d33..78e90f2 100644 --- a/analysis/process_full_cohort_data_migration_types.R +++ b/analysis/process_full_cohort_data_migration_types.R @@ -32,6 +32,7 @@ cohort <- read_feather(cohort_file) %>% migration_type_vars <- c("any_migrant", "not_born_in_uk", + "date_of_uk_entry", "immig_status_excl_refugee_asylum", "refugee_asylum_status", "english_not_main_language", diff --git a/project.yaml b/project.yaml index 800a504..860c464 100644 --- a/project.yaml +++ b/project.yaml @@ -55,6 +55,30 @@ actions: moderately_sensitive: csv: output/tables/demographics_full_study_cohort_6cat.csv + generate_demographics_full_study_table_2cat_withdoe: + run: r:latest analysis/process_full_cohort_data.R output/tables/demographics_full_study_cohort_2cat_withdoe.csv mig_status_2_cat_withdoe + needs: + - generate_full_study_cohort + outputs: + moderately_sensitive: + csv: output/tables/demographics_full_study_cohort_2cat_withdoe.csv + + generate_demographics_full_study_table_3cat_withdoe: + run: r:latest analysis/process_full_cohort_data.R output/tables/demographics_full_study_cohort_3cat_withdoe.csv mig_status_3_cat_withdoe + needs: + - generate_full_study_cohort + outputs: + moderately_sensitive: + csv: output/tables/demographics_full_study_cohort_3cat_withdoe.csv + + generate_demographics_full_study_table_6cat_withdoe: + run: r:latest analysis/process_full_cohort_data.R output/tables/demographics_full_study_cohort_6cat_withdoe.csv mig_status_6_cat_withdoe + needs: + - generate_full_study_cohort + outputs: + moderately_sensitive: + csv: output/tables/demographics_full_study_cohort_6cat_withdoe.csv + generate_demographics_full_study_table_migration_types: run: r:latest analysis/process_full_cohort_data_migration_types.R needs: @@ -143,7 +167,6 @@ actions: moderately_sensitive: csv: output/tables/demographics_census_2011_cohort_6cat_withdoe.csv - generate_demographics_census_2021_study_table_2cat_withdoe: run: r:latest analysis/process_census_cohort_data.R output/cohorts/census_2021_study_cohort.arrow output/tables/demographics_census_2021_cohort_2cat_withdoe.csv mig_status_2_cat_withdoe needs: