From a9238a18f563bf49d2604d0b7daf34495a48d164 Mon Sep 17 00:00:00 2001 From: Martina Pesce <82039235+marrpesce@users.noreply.github.com> Date: Tue, 23 Dec 2025 14:59:50 +0000 Subject: [PATCH 1/5] Add coded death --- analysis/Table_DoD.R | 39 ++++++++++++++++++++++++++++++++-- analysis/dataset_definition.py | 18 ++++++++++++++++ codelists/codelists.json | 6 ++++++ codelists/codelists.txt | 5 ++++- 4 files changed, 65 insertions(+), 3 deletions(-) diff --git a/analysis/Table_DoD.R b/analysis/Table_DoD.R index de30d27..d03a4aa 100644 --- a/analysis/Table_DoD.R +++ b/analysis/Table_DoD.R @@ -26,7 +26,8 @@ dataset0 <- read_csv("output/dataset_death_date_diff.csv.gz") %>% ethnicity = as.factor(ethnicity), sex = as.factor(sex), last_registration_start_date = as.Date(last_registration_start_date), - last_registration_end_date = as.Date(last_registration_end_date) + last_registration_end_date = as.Date(last_registration_end_date), + death_coded_date = as.Date(death_coded_date) ) %>% filter( has_registration == TRUE & # was registered at the beginning of the year the person died @@ -40,6 +41,7 @@ rounding <- function(vars) { vars > 7 ~ round(vars / 5) * 5) } + # ----------------------- # Create variables DoD_diff_dataset <- dataset0 %>% @@ -56,6 +58,8 @@ DoD_diff_dataset <- dataset0 %>% diff_DoD = TPP_death_date - ons_death_date, TPP_death = case_when(!is.na(TPP_death_date) ~ "yes", TRUE ~ NA_character_), + TPP_date_code_death = case_when(!is.na(TPP_death_date) | !is.na(death_coded_date) ~ "yes", + TRUE ~ NA_character_), ONS_death = case_when(!is.na(ons_death_date) ~ "yes", TRUE ~ NA_character_), rural_urb_recode = case_when( @@ -64,6 +68,11 @@ DoD_diff_dataset <- dataset0 %>% TRUE ~ NA_character_ ), year_pref_ONS = if_else(!is.na(ons_death_date), year(ons_death_date), year(TPP_death_date)), + year_pref_ONS_TPP_plus_codes = case_when( + !is.na(ons_death_date) ~ year(ons_death_date), + !is.na(TPP_death_date) ~ year(TPP_death_date), + !is.na(death_coded_date) ~ year(death_coded_date) + ), year_month_pref_ONS = if_else(!is.na(ons_death_date), format(ons_death_date, "%Y-%m"), format(TPP_death_date, "%Y-%m") ) ) %>% @@ -74,6 +83,12 @@ DoD_diff_dataset <- dataset0 %>% !is.na(TPP_death) & is.na(ONS_death) ~ "TPP", is.na(TPP_death) & is.na(ONS_death) ~ NA_character_ ), + ONS_or_TPP_date_or_codes = case_when( + !is.na(ONS_death) & !is.na(TPP_date_code_death) ~ "ONS & TPP", + !is.na(ONS_death) & is.na(TPP_date_code_death) ~ "ONS", + !is.na(TPP_date_code_death) & is.na(ONS_death) ~ "TPP", + is.na(TPP_date_code_death) & is.na(ONS_death) ~ NA_character_ + ), DoD_groups = case_when( diff_DoD == 0 ~ "0", @@ -207,6 +222,22 @@ table_source_general <- DoD_diff_dataset %>% select(year_pref_ONS, ONS_or_TPP, count, group_var, group_value) +# % by source including TPP coded deaths +table_source_general_plus_codes <- DoD_diff_dataset %>% + group_by(year_pref_ONS_TPP_plus_codes) %>% + mutate(total = rounding(n())) %>% + group_by(year_pref_ONS_TPP_plus_codes, ONS_or_TPP_date_or_codes, total) %>% + summarise( + count = rounding(n()), + .groups = "drop" + ) %>% + mutate( + group_var = "general population plus codes", + group_value = "general population plus codes" + ) %>% + select(year_pref_ONS_TPP_plus_codes, ONS_or_TPP_date_or_codes, count, group_var, group_value) + +#source by subgroup table_source_by_subgroup <- function(data, group_var) { group_var_name <- deparse(substitute(group_var)) @@ -237,7 +268,8 @@ table_source_sex <- table_source_by_subgroup(DoD_diff_dataset, sex) collate_death_source_table <- bind_rows( - table_source_general, + table_source_general, + table_source_general_plus_codes, table_source_age, table_source_ethnicity, table_source_region, @@ -250,6 +282,9 @@ collate_death_source_table <- bind_rows( write.csv(collate_death_source_table, here::here("output", "report", "table_DoD", "collate_death_source_table.csv")) + + + # % by source 2020-2024 table_source_general_20_24 <- DoD_diff_dataset %>% filter(year_pref_ONS > 2019 & year_pref_ONS < 2025) %>% diff --git a/analysis/dataset_definition.py b/analysis/dataset_definition.py index 0edee44..1b321fb 100644 --- a/analysis/dataset_definition.py +++ b/analysis/dataset_definition.py @@ -180,3 +180,21 @@ ## Include people registered with a TPP practice dataset.has_registration = practice_registrations.for_patient_on(year_start_DoD).exists_for_patient() | ((patients.date_of_birth.year == year_start_DoD.year) & practice_registrations.for_patient_on(earliest_DoD).exists_for_patient()) + + +# Coded date of death +death_coded = codelist_from_csv( + "codelists/nhsd-primary-care-domain-refsets-death_cod.csv", + column="code") + +dataset.death_coded_date = clinical_events.where( + clinical_events.snomedct_code.is_in(death_coded) + ).sort_by(clinical_events.date).last_for_patient().date + +# Dummy data configuration +dataset.configure_dummy_data(population_size=10000, timeout=180, + additional_population_constraint=( + dataset.TPP_death_date.is_on_or_between("2020-01-01","2020-05-01") & + dataset.death_coded_date.is_on_or_between("2020-01-01","2020-05-01") + ) + ) \ No newline at end of file diff --git a/codelists/codelists.json b/codelists/codelists.json index efe7700..f8da588 100644 --- a/codelists/codelists.json +++ b/codelists/codelists.json @@ -5,6 +5,12 @@ "url": "https://www.opencodelists.org/codelist/opensafely/ethnicity-snomed-0removed/22911876/", "downloaded_at": "2025-04-29 14:01:55.771409Z", "sha": "d95a2f826299563aa57f6fa99009e6e13a65151b" + }, + "nhsd-primary-care-domain-refsets-death_cod.csv": { + "id": "nhsd-primary-care-domain-refsets/death_cod/20250912", + "url": "https://www.opencodelists.org/codelist/nhsd-primary-care-domain-refsets/death_cod/20250912/", + "downloaded_at": "2025-12-23 13:03:26.486460Z", + "sha": "3cb7b2670df14b3ff5f42c65ad31d1f11599f6fc" } } } \ No newline at end of file diff --git a/codelists/codelists.txt b/codelists/codelists.txt index 57af32a..4ce87fe 100644 --- a/codelists/codelists.txt +++ b/codelists/codelists.txt @@ -1,2 +1,5 @@ # Ethnicity -opensafely/ethnicity-snomed-0removed/22911876 \ No newline at end of file +opensafely/ethnicity-snomed-0removed/22911876 + +# death codes +nhsd-primary-care-domain-refsets/death_cod/20250912/ \ No newline at end of file From 67aea7821b8429cc7f0117dd136607f43fcf123e Mon Sep 17 00:00:00 2001 From: Martina Pesce <82039235+marrpesce@users.noreply.github.com> Date: Tue, 23 Dec 2025 15:00:16 +0000 Subject: [PATCH 2/5] death codelist --- ...-primary-care-domain-refsets-death_cod.csv | 219 ++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 codelists/nhsd-primary-care-domain-refsets-death_cod.csv diff --git a/codelists/nhsd-primary-care-domain-refsets-death_cod.csv b/codelists/nhsd-primary-care-domain-refsets-death_cod.csv new file mode 100644 index 0000000..721b116 --- /dev/null +++ b/codelists/nhsd-primary-care-domain-refsets-death_cod.csv @@ -0,0 +1,219 @@ +code,term +11545006,"Emergency room admission, dead on arrival" +12141009,"Determination of outcome, death avoidable" +13234002,Death during anesthetic induction +1325731000000106,Died in residential home +13603008,"Determination of outcome, death avoidable, error in technique" +13831000000107,Cremation form Part B +13841000000103,Cremation form Part C +14035003,Death by hanging +15355001,Unattended death +15654006,Drowning in liquid other than water +162852002,On examination - dead - unexpected +162853007,On examination - dead - expected +162854001,On examination - dead - unattended death +162855000,On examination - dead - sudden death +162857008,On examination - dead - suspicious death +16541000000108,SD17/18 received - death clarification +16551000000106,"SD17/18 no details, returned" +16561000000109,Cremation form part C arranged +16983000,Death in hospital +17041000000103,FP22 - death +17111000000103,Death certificate Med A +17441000000106,Death certificate - form 11 +17766007,"Fetal death from asphyxia AND/OR anoxia, not clear if noted before OR after onset of labor" +183676005,Died in hospital +184084003,Registration ghost - deceased +184274007,Hospital death discharge notification +184275008,Death notification from hospital +184276009,Awaiting hospital death discharge letter +184277000,Received hospital death discharge letter +184278005,Asked for hospital death discharge letter +184280004,Death notification - non-hospital source +184297005,Patient died in hospital +184300000,Found dead at accident site +184305005,Cause of death +184307002,Postmortem report received +18521000000102,Cause of death clarification status SD17/18 +18632008,"Patient status determination, deceased" +18893004,Non-traffic vehicular accidental death +1917008,"Patient discharge, deceased, medicolegal case" +200155004,Death from any obstetric cause occurring more than 42 days but less than one year after delivery +200156003,Death from sequela of direct obstetric cause +22561000000103,"Unexpected.death, Procurator Fiscal told" +230802007,Brainstem death +23331000000109,SD17 form - cause of death clarification +23341000000100,SD18 form - cause of death clarification +23546003,Suicide while incarcerated +237358009,Direct maternal death +237360006,Late maternal death +242014006,Dry drowning +25318002,Death by strangulation +26636000,Sudden death +26878008,Death due to trampling +268923008,On examination - dead +270102001,Report for Coroner +270103006,Coroner report - requested +270104000,Coroner report - sent off +270105004,Coroner report - paid for +270107007,Coroner's postmortem report +270108002,Coroner's postmortem report awaited +270109005,Coroner's postmortem report requested +270110000,Coroner's postmortem report received +270113003,Cremation certification +270115005,Unexpected death-Coroner told +270116006,Postmortem report +270607006,Police surgeon's postmortem report awaited +270608001,Police surgeon's postmortem report requested +270609009,Police surgeon's postmortem report received +2711000000101,Cause of death clarification SD17/18 +274228002,Suicide by drug overdose +274639009,"Death occurring less than 24 hours from onset of symptoms, not otherwise expected" +274644002,"Sudden death, cause unknown" +275656009,"Stat B,C and F cremation certs" +27792001,Intrapartum maternal death +28631002,Euthanasia +287188006,Suicide using domestic gas +287189003,Suicide using car exhaust +287190007,Suicide by hanging +287191006,Suicide by suffocation +287192004,Suicide by drowning +287193009,Suicide using firearm +287194003,Suicide by cutting or stabbing +287195002,Suicide by jumping from high place +288260005,Maternal death - perinatal +300990005,Found dead in bed +305398007,Admission to the mortuary +306681000000104,Underlying cause of death +307928008,Death administrative procedure +307930005,Death certificate +308375000,Report for Procurator-Fiscal +308376004,Police surgeon postmortem report +308468002,Referral to coroner +308646001,Death certification +308647005,Cremation certification status +308806009,Coroner's report status +308808005,Postmortem report status +308809002,Coroner's postmortem report status +308810007,Police surgeon's postmortem report status +310334003,Cremation certificate +3133002,"Patient discharge, deceased, autopsy" +31868001,Antepartum maternal death +34468006,Sudden death of unknown cause during the puerperium +3581000000106,"Form 89 - patient died in medical care, patients family notified" +363049002,Death notification +366044004,Finding of place of death +36882002,Death due to assault AND battery +370921009,Patient death associated with a fall while being cared for in a healthcare facility +371000000100,SD17/18 completed +371828006,Patient deceased during stay (discharge status = dead) +373866000,Death due to chemotherapy toxicity +373867009,Death due to radiotherapy toxicity +37522003,"Determination of outcome, death unavoidable" +381000000103,Cremation form part B completed +38605008,Natural death +391000000101,Cremation form part C completed +39399006,Natural death with probable cause suspected +397709008,Patient died +398226000,Perioperative death +399753006,Date of death +401321000,Hospital notified of death +405535005,Adverse incident resulting in death +40947009,Drowning +415169009,Preoperative anesthetic death +41819004,"Patient discharge, deceased, donation of body" +418309003,Suspicious death +418362005,Dead - sudden death +418646009,Dead - expected +419099009,Dead +419393000,Dead - suspicious death +419620001,Death +419697005,Dead - unexpected +419973004,Found dead +423409001,Eastern Cooperative Oncology Group performance status - grade 5 +426999008,Cardiopulmonary resuscitation discontinued due to signs of death +428413005,Death verification +441760003,Unattended death of unknown cause +44301001,Suicide +44633000,Accidental death in public place +45216005,Iatrogenic death +49380000,Anesthetic death +49713007,Unexpected death +50105002,"Undetermined manner of death, natural causes suspected" +50109008,Drowning in fresh water +50181009,"Determination of outcome, death avoidable, chart audit required" +50514002,Instantaneous death +5062002,Death by immolation +51709005,Assisted suicide +52136000,Intraoperative death +5331006,Postoperative death +53559009,Death in less than 24 hours from onset of symptoms +5866003,Postpartum maternal death +59283008,Maternal death +62691006,"Determination of outcome, death avoidable, error in judgement" +63238001,Dead on arrival at hospital +6476001,Death by electrocution +65037004,"Death, manner undetermined" +65819004,Accidental death in home +66360006,Natural death with proved cause +66411002,"Undetermined manner of death, homicide suspected" +67786008,Death by fire +68023007,"Undetermined manner of death, suicide suspected" +698714004,Indirect maternal death +698715003,Death from sequela of indirect maternal cause +70055007,Unexpected sudden death of adult +702710003,Dead - death without witness +711157000,Sudden infant death with dysgenesis of testes syndrome +713049009,Died in community hospital +713050009,Died in learning disability unit +713051008,Died in mental health unit +713461000000100,Death administration +714342001,Died in ambulance +719425009,Sudden unexpected death in epilepsy +720529005,Death from indirect obstetric cause occurring more than 42 days but less than one year after delivery +720534009,Death from direct obstetric cause occurring more than 42 days but less than one year after delivery +725951000000101,GP22 deregistration - death +735686002,Sudden cardiac death due to cardiac arrhythmia +740603007,Killed +74148000,Death unattended by physician +7420003,Drowning in brackish water +74332007,Death by asphyxiation +74376000,Drowning in salt water +74660000,Accidental death in industrial place +75004002,"Emergency room admission, died in emergency room" +766231000000101,Cremation form 4 completed +766291000000100,Cremation form 5 completed +77105004,Garrotment +78070009,Suicide by multiple means +7878000,Accidental death +78857004,Natural death with proved cause without autopsy +794191000000103,Verification of expected death +79779006,"Patient discharge, deceased, no autopsy" +79867001,Traffic vehicular accidental death +812481000000104,Dead on arrival in accident and emergency department +812491000000102,Died in accident and emergency department +81840004,Natural death reportable to medicolegal authority +818961000000101,Patient died in residential institution +818971000000108,Patient died in public place +82621005,Natural death with proved cause by autopsy +863541000000104,Sudden unexpected death in childhood rapid response team call out +87281005,Death from overwork +87309006,Death of unknown cause +876879006,Died at home +876880009,Died in care home +876881008,Died in general practice premises +876882001,Died in hospice +876883006,Died during operation +876884000,Died in nursing home +876885004,Died in street +876886003,Died in usual place of residence +891003,Suicide by self-administered drug +89334003,"Patient discharge, deceased, to anatomic board" +89816009,Died without sign of disease +90049009,Unexplained sudden death +929151000000109,Patient died in Part III accommodation +929161000000107,Patient died in Part IV accommodation +931421000000107,Patient died in hospice community lodge +95281009,Sudden cardiac death +9855000,Natural death with unknown cause +9857008,"Determination of outcome, death avoidable, error in diagnosis" From e9f40f8587411eae9a6edf008bcc48298859f3f2 Mon Sep 17 00:00:00 2001 From: Martina Pesce <82039235+marrpesce@users.noreply.github.com> Date: Tue, 23 Dec 2025 15:00:29 +0000 Subject: [PATCH 3/5] rounding --- analysis/practice_decile.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analysis/practice_decile.R b/analysis/practice_decile.R index 35e1683..0deba75 100644 --- a/analysis/practice_decile.R +++ b/analysis/practice_decile.R @@ -61,7 +61,7 @@ base <- dataset_measure_practice %>% # Count contributing practices per year ---- n_by_year <- base %>% group_by(year) %>% - summarise(n_practices = n_distinct(practice), .groups = "drop") + summarise(n_practices = rounding(n_distinct(practice)), .groups = "drop") # Deciles per year df_quantiles <- base %>% @@ -108,7 +108,7 @@ ggsave(fs::path(output_dir, glue("practice_deciles_TPP_perc.png")), plot = pract pract_deaths_population <- dataset_measure_practice %>% group_by(year) %>% summarise( - practices_total = n_distinct(practice), + practices_total = rounding(n_distinct(practice)), practices_den_less_1000 = rounding(sum(is.na(global_denominator) | global_denominator < 1000)), practices_zero_deaths = rounding(sum(global_numerator == 0L, na.rm = TRUE)), practices_den_gt_1000_zero_deaths = rounding(sum((global_denominator > 1000) & (global_numerator == 0L), na.rm = TRUE)), From fd92761f550b9bf2c72aec3430906928981f12e6 Mon Sep 17 00:00:00 2001 From: Martina Pesce <82039235+marrpesce@users.noreply.github.com> Date: Tue, 27 Jan 2026 18:09:00 +0000 Subject: [PATCH 4/5] Reorder scripts and outputs folders --- .../{summary_stats.R => 1_summary_stats.R} | 101 +++--- ...g_dereg.R => 2_tables_ONS_TPP_reg_dereg.R} | 32 +- .../3_dataset_DoD_TPP_ONS_with_grace_period.R | 129 +++++++ analysis/4_source_prop.R | 243 +++++++++++++ analysis/5_DoD_diff.R | 85 +++++ ...actice_decile.R => 6_practice_by_source.R} | 10 +- .../Final-visualizations.R | 0 analysis/Table_DoD_without_grace_period.R | 326 ----------------- .../{ => dataset_def}/dataset_definition.py | 6 +- analysis/{ => dataset_def}/measure_def.py | 0 .../{ => dataset_def}/measure_practice.py | 0 analysis/{ => old}/Table_DoD.R | 0 analysis/old/Table_DoD_without_grace_period.R | 327 ++++++++++++++++++ analysis/{ => old}/Table_measure_rate.R | 0 project.yaml | 96 ++--- 15 files changed, 900 insertions(+), 455 deletions(-) rename analysis/{summary_stats.R => 1_summary_stats.R} (58%) rename analysis/{tables_ONS_TPP_reg_dereg.R => 2_tables_ONS_TPP_reg_dereg.R} (83%) create mode 100644 analysis/3_dataset_DoD_TPP_ONS_with_grace_period.R create mode 100644 analysis/4_source_prop.R create mode 100644 analysis/5_DoD_diff.R rename analysis/{practice_decile.R => 6_practice_by_source.R} (92%) rename Final-visualizations.R => analysis/Final-visualizations.R (100%) delete mode 100644 analysis/Table_DoD_without_grace_period.R rename analysis/{ => dataset_def}/dataset_definition.py (98%) rename analysis/{ => dataset_def}/measure_def.py (100%) rename analysis/{ => dataset_def}/measure_practice.py (100%) rename analysis/{ => old}/Table_DoD.R (100%) create mode 100644 analysis/old/Table_DoD_without_grace_period.R rename analysis/{ => old}/Table_measure_rate.R (100%) diff --git a/analysis/summary_stats.R b/analysis/1_summary_stats.R similarity index 58% rename from analysis/summary_stats.R rename to analysis/1_summary_stats.R index e401e19..96f850f 100644 --- a/analysis/summary_stats.R +++ b/analysis/1_summary_stats.R @@ -9,11 +9,11 @@ library("here") library("skimr") ## Create output directory -output_dir <- here("output", "DoD") +output_dir <- here("output", "summary_stats") fs::dir_create(output_dir) # Import processed data ---- -DoD_diff <- read_csv("output/dataset_death_date_diff.csv.gz") %>% +dataset_death_TPP_ONS <- read_csv("output/highly_sensitive/dataset_death_TPP_ONS.csv.gz") %>% mutate( region = as.factor(region), age_band = as.factor(age_band), @@ -32,13 +32,13 @@ DoD_diff <- read_csv("output/dataset_death_date_diff.csv.gz") %>% ) ) -DoD_diff_plus_inc_crit <- DoD_diff %>% +dataset_death_TPP_ONS_plus_inc_crit <- dataset_death_TPP_ONS %>% filter( has_registration == TRUE & # was registered at the beginning of the year the person died any_death_during_study == TRUE # died between "2009-01-01" - "2025-06-06" + (deregistration date + 30 days) is after one date of death ) -DoD_diff_plus_inc_crit_0_dereg_death <- DoD_diff_plus_inc_crit %>% +dataset_death_TPP_ONS_plus_inc_crit_0_dereg_death <- dataset_death_TPP_ONS_plus_inc_crit %>% filter( death_dereg_diff >= 0) @@ -46,55 +46,56 @@ DoD_diff_plus_inc_crit_0_dereg_death <- DoD_diff_plus_inc_crit %>% # Summary ## Gral -summary_DoD_diff <- skim(DoD_diff) +summary_dataset_death_TPP_ONS <- skim(dataset_death_TPP_ONS) -summary_DoD_diff_plus_inc_crit <- skim(DoD_diff_plus_inc_crit) +summary_dataset_death_TPP_ONS_plus_inc_crit <- skim(dataset_death_TPP_ONS_plus_inc_crit) -summary_DoD_diff_plus_inc_crit_0_dereg_death <- skim(DoD_diff_plus_inc_crit_0_dereg_death) +summary_dataset_death_TPP_ONS_plus_inc_crit_0_dereg_death <- skim(dataset_death_TPP_ONS_plus_inc_crit_0_dereg_death) -write.csv(summary_DoD_diff, file = here::here("output", "DoD","summary_stats_DoD.csv"), +write.csv(summary_dataset_death_TPP_ONS, file = here::here("output", "summary_stats","summary_stats_dataset_death_TPP_ONS.csv"), row.names = FALSE) -write.csv(summary_DoD_diff_plus_inc_crit, file = here::here("output", "DoD","summary_DoD_diff_plus_inc_crit.csv"), +write.csv(summary_dataset_death_TPP_ONS_plus_inc_crit, file = here::here("output", "summary_stats","summary_dataset_death_TPP_ONS_plus_inc_crit.csv"), row.names = FALSE) -write.csv(summary_DoD_diff_plus_inc_crit_0_dereg_death, file = here::here("output", "DoD","summary_DoD_diff_plus_inc_crit_0_dereg_death.csv"), +write.csv(summary_dataset_death_TPP_ONS_plus_inc_crit_0_dereg_death, file = here::here("output", "summary_stats","summary_dataset_death_TPP_ONS_plus_inc_crit_0_dereg_death.csv"), row.names = FALSE) -## Cat -table_freq <- DoD_diff %>% +## Categorical variables + +table_freq <- dataset_death_TPP_ONS %>% pivot_longer(cols = c(age_band, ons_death_place:ethnicity), names_to = "subgroup", values_to = "category") %>% group_by(year(min_DoD), subgroup, category) %>% summarise( n=n() ) -table_freq_plus_inc_crit <- DoD_diff_plus_inc_crit %>% +table_freq_plus_inc_crit <- dataset_death_TPP_ONS_plus_inc_crit %>% pivot_longer(cols = c(age_band, ons_death_place:ethnicity), names_to = "subgroup", values_to = "category") %>% group_by(year(min_DoD), subgroup, category) %>% summarise( n=n() ) -table_freq_plus_inc_crit_0_dereg_death <- DoD_diff_plus_inc_crit_0_dereg_death %>% +table_freq_plus_inc_crit_0_dereg_death <- dataset_death_TPP_ONS_plus_inc_crit_0_dereg_death %>% pivot_longer(cols = c(age_band, ons_death_place:ethnicity), names_to = "subgroup", values_to = "category") %>% group_by(year(min_DoD), subgroup, category) %>% summarise( n=n() ) -write.csv(table_freq, file = here::here("output", "DoD","table_freq_DoD.csv"), +write.csv(table_freq, file = here::here("output", "summary_stats","table_freq_DoD.csv"), row.names = FALSE) -write.csv(table_freq_plus_inc_crit, file = here::here("output", "DoD","table_freq_plus_inc_crit.csv"), +write.csv(table_freq_plus_inc_crit, file = here::here("output", "summary_stats","table_freq_plus_inc_crit.csv"), row.names = FALSE) -write.csv(table_freq_plus_inc_crit_0_dereg_death, file = here::here("output", "DoD","table_freq_plus_inc_crit_0_dereg_death.csv"), +write.csv(table_freq_plus_inc_crit_0_dereg_death, file = here::here("output", "summary_stats","table_freq_plus_inc_crit_0_dereg_death.csv"), row.names = FALSE) -# Impossible dates of death not release -impossible_dod_month <- DoD_diff %>% +# Impossible dates of death +impossible_dod <- dataset_death_TPP_ONS %>% mutate( ons_DoD_impossible = case_when( ons_death_date < date_of_birth ~ "death_before_birth", @@ -110,7 +111,8 @@ impossible_dod_month <- DoD_diff %>% is.na(TPP_death_date) ~ "is empty", TRUE ~ "ok" ), - year_month_min_dod = format(min_DoD, "%Y-%m") + year_month_min_dod = format(min_DoD, "%Y-%m"), + year_min_dod = format(min_DoD, "%Y") ) %>% pivot_longer( cols = c(ons_DoD_impossible, TPP_DoD_impossible), @@ -123,7 +125,10 @@ impossible_dod_month <- DoD_diff %>% source == "TPP_DoD_impossible" ~ "TPP", TRUE ~ source ) - ) %>% + ) + +# by month +impossible_dod_month <- impossible_dod %>% group_by(year_month_min_dod, source, DoD_impossible) %>% summarise( n = n(), @@ -132,45 +137,25 @@ impossible_dod_month <- DoD_diff %>% write.csv( impossible_dod_month, - here::here("output", "DoD", "impossible_dod_month.csv"), + here::here("output", "summary_stats", "impossible_dod_month.csv"), + row.names = FALSE +) + + +# by year +impossible_dod_year <- impossible_dod %>% +group_by(year_min_dod, source, DoD_impossible) %>% + summarise( + n = n(), + .groups = "drop" + ) + +write.csv( + impossible_dod_year, + here::here("output", "summary_stats", "impossible_dod_year.csv"), row.names = FALSE ) -# # plots -# -# # Plot histogram faceted by data source -# ## Data prep -# DoD_diff_date_long <- DoD_diff %>% -# pivot_longer( -# cols = c(TPP_death_date, ons_death_date, min_DoD), -# names_to = "source", -# values_to = "death_date" -# ) - -# ## Plot -# DoD_histogram <- ggplot(DoD_diff_date_long, aes(x = death_date)) + -# geom_histogram() + -# facet_wrap(~ source, scales = "free_y") + -# theme_minimal() + -# labs(title = "Death Dates by Source", x = "Date", y = "Count") -# -# -# # Cat variables bar plots -# ## Data prep -# DoD_diff_cat_long <- DoD_diff %>% -# pivot_longer( -# cols = c(ons_death_place, region, rural_urban, age_band, IMD_q10, ethnicity), -# names_to = "variable", -# values_to = "category" -# ) -# -# # Bar plots subcat -# DoD_diff_cat_bar_plot <- ggplot(DoD_diff_cat_long, aes(x = category)) + -# geom_bar(aes(fill=year)) + -# facet_wrap(~ variable, scales = "free_x") + -# theme_minimal() + -# labs(title = "Counts by subgroup", -# x = "Category", y = "Count") + -# theme(axis.text.x = element_text(angle = 45, hjust = 1)) + diff --git a/analysis/tables_ONS_TPP_reg_dereg.R b/analysis/2_tables_ONS_TPP_reg_dereg.R similarity index 83% rename from analysis/tables_ONS_TPP_reg_dereg.R rename to analysis/2_tables_ONS_TPP_reg_dereg.R index 73242a3..0f70b89 100644 --- a/analysis/tables_ONS_TPP_reg_dereg.R +++ b/analysis/2_tables_ONS_TPP_reg_dereg.R @@ -13,7 +13,7 @@ output_dir <- here("output", "report", "reg_dereg_ONS_TPP") fs::dir_create(output_dir) # Import processed data ---- -dataset0 <- read_csv("output/dataset_death_date_diff.csv.gz") %>% +dataset_death_TPP_ONS <- read_csv("output/highly_sensitive/dataset_death_TPP_ONS.csv.gz") %>% mutate( TPP_death_date = as.Date(TPP_death_date), ons_death_date = as.Date(ons_death_date), @@ -38,7 +38,7 @@ rounding <- function(vars) { # Data curating --------------- -dataset <- dataset0 %>% +death_TPP_ONS_reg_dereg <- dataset_death_TPP_ONS %>% mutate( TPP_death_reg = case_when( !is.na(TPP_death_date) & !is.na(last_registration_start_date) ~ @@ -102,22 +102,22 @@ dataset <- dataset0 %>% # a- ONS deaths distribution across time and inclusion criteria -ONS_death_year_by_reg <- dataset %>% +ONS_reg_tpp_year <- death_TPP_ONS_reg_dereg %>% filter(!is.na(ons_death_date)) %>% group_by(ons_death_year = year(ons_death_date)) %>% summarise( any_ons_death = rounding(n()), # any ONS death - ons_death_regist_before = rounding(sum(has_registration == TRUE, na.rm = TRUE)), # ONS + last registration before death - ons_registred_during = rounding(sum(ons_death_during_study == TRUE & has_registration == TRUE, na.rm = TRUE)), - tpp_any = rounding(sum(!is.na(TPP_death_date), na.rm = TRUE)), # any ONS death + any TPP death - tpp_reg_during_study = rounding(sum(tpp_death_during_study == TRUE & has_registration == TRUE, na.rm = TRUE)), # ONS + TPP + last dereg date after death + ons_death_regis = rounding(sum(has_registration == TRUE, na.rm = TRUE)), # ONS + last registration before death + ons_regis_and_during_study = rounding(sum(ons_death_during_study == TRUE & has_registration == TRUE, na.rm = TRUE)), + any_ons_tpp = rounding(sum(!is.na(TPP_death_date), na.rm = TRUE)), # any ONS death + any TPP death + ons_tpp_regis_and_during_study = rounding(sum(tpp_death_during_study == TRUE & has_registration == TRUE, na.rm = TRUE)), # ONS + TPP + last dereg date after death .groups = "drop" ) -write.csv(ONS_death_year_by_reg, here::here("output", "report", "reg_dereg_ONS_TPP", "ONS_death_year_by_reg.csv")) +write.csv(ONS_reg_tpp_year, here::here("output", "report", "reg_dereg_ONS_TPP", "ONS_reg_tpp_year.csv")) # b- Difference date of death - last registration date -ONS_death_reg_group<- dataset %>% +ONS_death_reg_group<- death_TPP_ONS_reg_dereg %>% filter(!is.na(ons_death_date)) %>% group_by(ons_death_year = year(ons_death_date), ons_death_reg_group) %>% summarise(n = rounding(n()), .groups = "drop") @@ -125,7 +125,7 @@ ONS_death_reg_group<- dataset %>% write.csv(ONS_death_reg_group, here::here("output", "report", "reg_dereg_ONS_TPP", "ONS_death_reg_group.csv")) # c- Difference deregistration - death -ons_death_dereg_group <- dataset %>% +ons_death_dereg_group <- death_TPP_ONS_reg_dereg %>% filter(!is.na(ons_death_date) & has_registration == TRUE) %>% group_by(ons_death_year = year(ons_death_date), ons_death_dereg_group) %>% summarise(n = rounding(n()), .groups = "drop") @@ -137,7 +137,7 @@ write.csv(ons_death_dereg_group, here::here("output", "report", "reg_dereg_ONS_T # a- NO # b- Difference date of death - last registration date -TPP_death_reg_group<- dataset %>% +TPP_death_reg_group<- death_TPP_ONS_reg_dereg %>% filter(!is.na(TPP_death_date)) %>% group_by(TPP_death_year = year(TPP_death_date), TPP_death_reg_group) %>% summarise(n = rounding(n()), .groups = "drop") @@ -145,7 +145,7 @@ TPP_death_reg_group<- dataset %>% write.csv(TPP_death_reg_group, here::here("output", "report", "reg_dereg_ONS_TPP", "TPP_death_reg_group.csv")) # c- Difference deregistration - death -TPP_death_dereg_group <- dataset %>% +TPP_death_dereg_group <- death_TPP_ONS_reg_dereg %>% filter(!is.na(TPP_death_date) & has_registration == TRUE) %>% group_by(TPP_death_year = year(TPP_death_date), TPP_death_dereg_group) %>% summarise(n = rounding(n()), .groups = "drop") @@ -158,7 +158,7 @@ write.csv(TPP_death_dereg_group, here::here("output", "report", "reg_dereg_ONS_T # granular table not for release # Granular table: daily difference deregistration - death for ONS -ons_death_dereg_daily <- dataset %>% +ons_death_dereg_daily <- death_TPP_ONS_reg_dereg %>% filter( !is.na(ons_death_date), !is.na(last_registration_end_date), @@ -177,12 +177,12 @@ ons_death_dereg_daily <- dataset %>% write.csv( ons_death_dereg_daily, - here::here("output", "report", "reg_dereg_ONS_TPP", "ons_death_dereg_daily.csv"), + here::here("output", "report", "reg_dereg_ONS_TPP", "nr_ons_death_dereg_daily.csv"), row.names = FALSE ) # Granular table: daily difference deregistration - death for TPP -TPP_death_dereg_daily <- dataset %>% +TPP_death_dereg_daily <- death_TPP_ONS_reg_dereg %>% filter( !is.na(TPP_death_date), has_registration == TRUE, @@ -202,6 +202,6 @@ TPP_death_dereg_daily <- dataset %>% write.csv( TPP_death_dereg_daily, - here::here("output", "report", "reg_dereg_ONS_TPP", "TPP_death_dereg_daily.csv"), + here::here("output", "report", "reg_dereg_ONS_TPP", "nr_TPP_death_dereg_daily.csv"), row.names = FALSE ) diff --git a/analysis/3_dataset_DoD_TPP_ONS_with_grace_period.R b/analysis/3_dataset_DoD_TPP_ONS_with_grace_period.R new file mode 100644 index 0000000..7277013 --- /dev/null +++ b/analysis/3_dataset_DoD_TPP_ONS_with_grace_period.R @@ -0,0 +1,129 @@ +# Preliminaries ---- + +# Import libraries +library("tidyverse") +library("dtplyr") +library("lubridate") +library("glue") +library("here") + + +## Create output directory +output_dir <- here("output", "highly_sensitive") +# fs::dir_create(output_dir) + +# Import processed data ---- +dataset_death_TPP_ONS <- read_csv("output/highly_sensitive/dataset_death_TPP_ONS.csv.gz") %>% + mutate( + TPP_death_date = as.Date(TPP_death_date), + ons_death_date = as.Date(ons_death_date), + date_of_birth = as.Date(date_of_birth), + age_band = as.factor(age_band), + practice = as.factor(practice), + ons_death_place = as.factor(ons_death_place), + region = as.factor(region), + IMD_q10 = as.factor(IMD_q10), + ethnicity = as.factor(ethnicity), + sex = as.factor(sex), + last_registration_start_date = as.Date(last_registration_start_date), + last_registration_end_date = as.Date(last_registration_end_date) + ) %>% + filter( + has_registration == TRUE & # was registered at the beginning of the year the person died + any_death_during_study == TRUE # died between "2009-01-01" - "2025-06-06" + (deregistration date + 30 days) is after one date of death + ) + + +# ----------------------- +# Create variables +DoD_TPP_ONS_with_grace_period <- dataset_death_TPP_ONS %>% + mutate( + death_dereg_diff_TPP = case_when( + !is.na(last_registration_end_date) ~ as.Date(last_registration_end_date) - TPP_death_date, + TRUE ~ as.difftime(NA_real_, units = "days" + )), + death_dereg_diff_ONS = case_when( + !is.na(last_registration_end_date) ~ as.Date(last_registration_end_date) - ons_death_date, + TRUE ~ as.difftime(NA_real_, units = "days" + )), + DoD_min = pmin(TPP_death_date, ons_death_date, na.rm = TRUE), + diff_DoD = TPP_death_date - ons_death_date, + TPP_death = case_when(!is.na(TPP_death_date) ~ "yes", + TRUE ~ NA_character_), + ONS_death = case_when(!is.na(ons_death_date) ~ "yes", + TRUE ~ NA_character_), + rural_urb_recode = case_when( + rural_urban < 5 ~ "urban", + rural_urban >= 5 ~ "rural", + TRUE ~ NA_character_ + ), + year_pref_ONS = if_else(!is.na(ons_death_date), year(ons_death_date), year(TPP_death_date)), + year_month_pref_ONS = if_else(!is.na(ons_death_date), format(ons_death_date, "%Y-%m"), format(TPP_death_date, "%Y-%m") + ) + ) %>% + mutate( + ONS_or_TPP = case_when( + !is.na(ONS_death) & !is.na(TPP_death) ~ "ONS & TPP", + !is.na(ONS_death) & is.na(TPP_death) ~ "ONS", + !is.na(TPP_death) & is.na(ONS_death) ~ "TPP", + is.na(TPP_death) & is.na(ONS_death) ~ NA_character_ + ), + DoD_groups = case_when( + diff_DoD == 0 ~ "0", + + diff_DoD >= 1 & diff_DoD <= 7 ~ "1-7", + diff_DoD >= 8 & diff_DoD <= 31 ~ "8-31", + diff_DoD >= 32 ~ "32+", + + diff_DoD <= -1 & diff_DoD >= -7 ~ "-1 to -7", + diff_DoD <= -8 & diff_DoD >= -31 ~ "-8 to -31", + diff_DoD <= -32 ~ "-32+", + + TRUE ~ NA_character_ + ), + DoD_dereg_tpp = case_when( + death_dereg_diff_TPP == 0 ~ "0", + + death_dereg_diff_TPP >= 1 & death_dereg_diff_TPP <= 7 ~ "1-7", + death_dereg_diff_TPP >= 8 & death_dereg_diff_TPP <= 31 ~ "8-31", + death_dereg_diff_TPP >= 32 ~ "32+", + + death_dereg_diff_TPP <= -1 & death_dereg_diff_TPP >= -7 ~ "-1 to -7", + death_dereg_diff_TPP <= -8 & death_dereg_diff_TPP >= -31 ~ "-8 to -31", + death_dereg_diff_TPP <= -32 ~ "-32+", + + TRUE ~ NA_character_ + ), + DoD_dereg_TPP_group = case_when( + death_dereg_diff_TPP == 0 ~ "0", + + death_dereg_diff_TPP >= 1 & death_dereg_diff_TPP <= 7 ~ "1-7", + death_dereg_diff_TPP >= 8 & death_dereg_diff_TPP <= 31 ~ "8-31", + death_dereg_diff_TPP >= 32 ~ "32+", + + death_dereg_diff_TPP <= -1 & death_dereg_diff_TPP >= -7 ~ "-1 to -7", + death_dereg_diff_TPP <= -8 & death_dereg_diff_TPP >= -31 ~ "-8 to -31", + death_dereg_diff_TPP <= -32 ~ "-32+", + + TRUE ~ NA_character_ + ), + DoD_dereg_ONS_group = case_when( + death_dereg_diff_ONS == 0 ~ "0", + + death_dereg_diff_ONS >= 1 & death_dereg_diff_ONS <= 7 ~ "1-7", + death_dereg_diff_ONS >= 8 & death_dereg_diff_ONS <= 31 ~ "8-31", + death_dereg_diff_ONS >= 32 ~ "32+", + + death_dereg_diff_ONS <= -1 & death_dereg_diff_ONS >= -7 ~ "-1 to -7", + death_dereg_diff_ONS <= -8 & death_dereg_diff_ONS >= -31 ~ "-8 to -31", + death_dereg_diff_ONS <= -32 ~ "-32+", + + TRUE ~ NA_character_ + ) + ) # %>% + #filter( + # death_dereg_diff_ONS >= 0 | death_dereg_diff_TPP >= 0 + #) + +write.csv(DoD_TPP_ONS_with_grace_period, here::here("output", "highly_sensitive", "DoD_TPP_ONS_with_grace_period.csv")) + diff --git a/analysis/4_source_prop.R b/analysis/4_source_prop.R new file mode 100644 index 0000000..84e949a --- /dev/null +++ b/analysis/4_source_prop.R @@ -0,0 +1,243 @@ +# Preliminaries ---- + +# Import libraries +library("tidyverse") +library("dtplyr") +library("lubridate") +library("glue") +library("here") + +# Rounding function + +rounding <- function(vars) { + case_when(vars == 0 ~ 0, + vars > 7 ~ round(vars / 5) * 5) +} + + +## Create output directory +output_dir <- here("output", "report","by_source") +fs::dir_create(output_dir) + +# dataset with grace period +DoD_TPP_ONS_with_grace_period <- read_csv("output/highly_sensitive/DoD_TPP_ONS_with_grace_period.csv") + +# 1- ONS / TPP distribution without grace period -------------------------------------------- +DoD_TPP_ONS_without_grace_period <- DoD_TPP_ONS_with_grace_period %>% + mutate( + ons_without_grace_period = !is.na(ONS_death) & death_dereg_diff_ONS > 0, + tpp_without_grace_period = !is.na(TPP_death) & death_dereg_diff_TPP > 0 + ) %>% + mutate( + ONS_or_TPP_without_grace_period = case_when( + ons_without_grace_period & tpp_without_grace_period ~ "ONS & TPP", + ons_without_grace_period & !tpp_without_grace_period ~ "ONS", + tpp_without_grace_period & !ons_without_grace_period ~ "TPP", + TRUE ~ NA_character_ + ) + ) + +# # 2- Table by source +table_source_general_without_grace_period <- DoD_TPP_ONS_without_grace_period %>% + group_by(year_pref_ONS) %>% + mutate(total = rounding(n())) %>% + group_by(year_pref_ONS, ONS_or_TPP_without_grace_period, total) %>% + summarise( + count = rounding(n()), + .groups = "drop" + ) %>% + mutate( + group_var = "general population", + group_value = "general population" + ) %>% + select(year_pref_ONS, ONS_or_TPP_without_grace_period, count, group_var, group_value) + + +table_source_by_subgroup_without_grace_period <- function(data, group_var) { + group_var_name <- deparse(substitute(group_var)) + + data %>% + filter(year_pref_ONS > 2008) %>% + group_by(year_pref_ONS, {{ group_var }}) %>% + mutate(total = n()) %>% + group_by(year_pref_ONS, {{ group_var }}, ONS_or_TPP_without_grace_period, total) %>% + summarise( + count = rounding(n()), + .groups = "drop" + ) %>% + mutate( + group_var = group_var_name, + group_value = as.character({{ group_var }}) + ) %>% + select(year_pref_ONS, ONS_or_TPP_without_grace_period, count, group_var, group_value) +} + + +table_source_age_without_grace_period <- table_source_by_subgroup_without_grace_period(DoD_TPP_ONS_without_grace_period, age_band) +table_source_ethnicity_without_grace_period <- table_source_by_subgroup_without_grace_period(DoD_TPP_ONS_without_grace_period, ethnicity) +table_source_region_without_grace_period <- table_source_by_subgroup_without_grace_period(DoD_TPP_ONS_without_grace_period, region) +table_source_place_without_grace_period <- table_source_by_subgroup_without_grace_period(DoD_TPP_ONS_without_grace_period, ons_death_place) +table_source_urban_without_grace_period <- table_source_by_subgroup_without_grace_period(DoD_TPP_ONS_without_grace_period, rural_urban) +table_source_IMD_without_grace_period <- table_source_by_subgroup_without_grace_period(DoD_TPP_ONS_without_grace_period, IMD_q10) +table_source_sex_without_grace_period <- table_source_by_subgroup_without_grace_period(DoD_TPP_ONS_without_grace_period, sex) + + +collate_death_source_without_grace_period <- bind_rows( + table_source_general_without_grace_period, + table_source_age_without_grace_period, + table_source_ethnicity_without_grace_period, + table_source_region_without_grace_period, + table_source_place_without_grace_period, + table_source_urban_without_grace_period, + table_source_IMD_without_grace_period, + table_source_sex_without_grace_period +) + +write.csv(collate_death_source_without_grace_period, here::here("output", "report","by_source", "collate_death_source_without_grace_period.csv")) + + +# 2. ONS / TPP distribution with grace period --------------------------------- +table_source_general <- DoD_TPP_ONS_with_grace_period %>% + group_by(year_pref_ONS) %>% + mutate(total = rounding(n())) %>% + group_by(year_pref_ONS, ONS_or_TPP, total) %>% + summarise( + count = rounding(n()), + .groups = "drop" + ) %>% + mutate( + group_var = "general population", + group_value = "general population" + ) %>% + select(year_pref_ONS, ONS_or_TPP, count, group_var, group_value) + + + +#source by subgroup +table_source_by_subgroup <- function(data, group_var) { + group_var_name <- deparse(substitute(group_var)) + + data %>% + filter(year_pref_ONS > 2008) %>% + group_by(year_pref_ONS, {{ group_var }}) %>% + mutate(total = n()) %>% + group_by(year_pref_ONS, {{ group_var }}, ONS_or_TPP, total) %>% + summarise( + count = rounding(n()), + .groups = "drop" + ) %>% + mutate( + group_var = group_var_name, + group_value = as.character({{ group_var }}) + ) %>% + select(year_pref_ONS, ONS_or_TPP, count, group_var, group_value) +} + + +table_source_age <- table_source_by_subgroup(DoD_TPP_ONS_with_grace_period, age_band) +table_source_ethnicity <- table_source_by_subgroup(DoD_TPP_ONS_with_grace_period, ethnicity) +table_source_region <- table_source_by_subgroup(DoD_TPP_ONS_with_grace_period, region) +table_source_place <- table_source_by_subgroup(DoD_TPP_ONS_with_grace_period, ons_death_place) +table_source_urban <- table_source_by_subgroup(DoD_TPP_ONS_with_grace_period, rural_urban) +table_source_IMD <- table_source_by_subgroup(DoD_TPP_ONS_with_grace_period, IMD_q10) +table_source_sex <- table_source_by_subgroup(DoD_TPP_ONS_with_grace_period, sex) + + +collate_death_source_table_with_grace_period <- bind_rows( + table_source_general, + table_source_age, + table_source_ethnicity, + table_source_region, + table_source_place, + table_source_urban, + table_source_IMD, + table_source_sex +) + +write.csv(collate_death_source_table_with_grace_period, here::here("output", "report","by_source", "collate_death_source_with_grace_period.csv")) + +# Special periods + +# % by source 2020-2024 +table_source_general_20_24 <- DoD_TPP_ONS_with_grace_period %>% + filter(year_pref_ONS > 2019 & year_pref_ONS < 2025) %>% + group_by(ONS_or_TPP) %>% + summarise( + count = rounding(n()), + .groups = "drop" + ) %>% + mutate( + group_var = "general population", + group_value = "general population", + period = "2020-2024" + ) %>% + select(period, ONS_or_TPP, count, group_var, group_value) + + + +# % by source 01-01-2025 to 06-06-2024 by month +table_source_general_2025 <- DoD_TPP_ONS_with_grace_period %>% + filter(year_pref_ONS > 2023 & year_month_pref_ONS != "2025-06") %>% + group_by(year_month_pref_ONS) %>% + mutate(total = rounding(n())) %>% + group_by(year_month_pref_ONS, ONS_or_TPP, total) %>% + summarise( + count = rounding(n()), + .groups = "drop" + ) %>% + mutate( + group_var = "general population", + group_value = "general population" + ) %>% + rename( + period = year_month_pref_ONS + ) %>% + select(period, ONS_or_TPP, count, group_var, group_value) + +collate_death_source_table_spec_periods <- bind_rows(table_source_general_2025, table_source_general_20_24) + +write.csv(collate_death_source_table_spec_periods, here::here("output", "report","by_source", "collate_death_source_grace_per_spec_periods.csv")) + + +# 3. TPP death codes ------------------------------------------------------------- +# number of deaths coded TPP +death_plus_coded_TPP <- DoD_TPP_ONS_with_grace_period %>% + mutate( + year_pref_ONS_TPP_plus_codes = case_when( + !is.na(ons_death_date) ~ year(ons_death_date), + !is.na(TPP_death_date) ~ year(TPP_death_date), + !is.na(death_coded_date) ~ year(death_coded_date) + ), + TPP_date_or_coded = case_when( + (!is.na(TPP_death_date) & + is.na(death_coded_date)) ~ "TPP - dated, not coded", + (is.na(TPP_death_date) & + !is.na(death_coded_date)) ~ "TPP - not dated, coded", + (!is.na(TPP_death_date) & + !is.na(death_coded_date)) ~ "TPP - dated and coded", + (is.na(TPP_death_date) & + is.na(death_coded_date)) ~ "TPP - neither dated nor coded" + ) + ) + + + +# Check how many have a date recorded / are coded by year +TPP_coded_date_death <- death_plus_coded_TPP %>% + group_by(year_pref_ONS_TPP_plus_codes) %>% + mutate(TPP_year = rounding(n())) %>% + group_by(TPP_date_or_coded, year_pref_ONS_TPP_plus_codes, TPP_year) %>% + summarise(TPP_subgroup = rounding(n())) %>% + mutate(pcent = TPP_subgroup / TPP_year * 100) + +write.csv(TPP_coded_date_death, here::here("output", "report","by_source", "TPP_coding_date_year.csv")) + + +# Table tpp - coded / date + ONS +ons_tpp_date_code <- death_plus_coded_TPP %>% + count(year_pref_ONS_TPP_plus_codes,TPP_date_or_coded, ONS_death) %>% + mutate( + n= rounding(n) + ) + +write.csv(ons_tpp_date_code, here::here("output", "report", "by_source", "ons_tpp_date_code_year.csv")) \ No newline at end of file diff --git a/analysis/5_DoD_diff.R b/analysis/5_DoD_diff.R new file mode 100644 index 0000000..fd69e7e --- /dev/null +++ b/analysis/5_DoD_diff.R @@ -0,0 +1,85 @@ +# Preliminaries ---- + +# Import libraries +library("tidyverse") +library("dtplyr") +library("lubridate") +library("glue") +library("here") + +# Rounding function + +rounding <- function(vars) { + case_when(vars == 0 ~ 0, + vars > 7 ~ round(vars / 5) * 5) +} + + +## Create output directory +output_dir <- here("output", "report","DoD_diff") +fs::dir_create(output_dir) + +# dataset with grace period +DoD_TPP_ONS_with_grace_period <- read_csv("output/highly_sensitive/DoD_TPP_ONS_with_grace_period.csv") + + + +##### 1- Diff DoD with grace period---------------------------------------------- + +# Table key indicators by year +table_DoD_general_with_grace_period <- DoD_TPP_ONS_with_grace_period %>% + filter(ONS_or_TPP == "ONS & TPP") %>% + group_by( + year_pref_ONS + ) %>% + mutate( + GP_ONS_annual_deaths = rounding(n()) + ) %>% + ungroup() %>% + group_by(year_pref_ONS, DoD_groups , GP_ONS_annual_deaths) %>% + summarise(count_by_group_DoD = rounding(n())) %>% + mutate( + group_var = "general population", + group_value = "general population" + ) + + +#Date diff by group +summarise_DoD_by_group <- function(data, group_var) { + group_var_name <- deparse(substitute(group_var)) + + data %>% + filter(ONS_or_TPP == "ONS & TPP") %>% + group_by(year_pref_ONS, {{ group_var }}) %>% + mutate(GP_ONS_annual_deaths = rounding(n())) %>% + ungroup() %>% + group_by(year_pref_ONS, {{ group_var }}, DoD_groups, GP_ONS_annual_deaths) %>% + summarise(count_by_group_DoD = rounding(n()), .groups = "drop") %>% + mutate( + group_var = group_var_name, + group_value = as.character({{ group_var }}) + ) %>% + select(year_pref_ONS, DoD_groups, GP_ONS_annual_deaths, count_by_group_DoD, group_var, group_value) +} + +#tables by group + +DoD_by_age <- summarise_DoD_by_group(DoD_TPP_ONS_with_grace_period, age_band) + +# DoD_by_practice <- summarise_DoD_by_group(DoD_TPP_ONS_with_grace_period, practice) + +DoD_by_ons_death_place <- summarise_DoD_by_group(DoD_TPP_ONS_with_grace_period, ons_death_place) + +DoD_by_region <- summarise_DoD_by_group(DoD_TPP_ONS_with_grace_period, region) + +DoD_by_rural_urban <- summarise_DoD_by_group(DoD_TPP_ONS_with_grace_period, rural_urban) + +DoD_by_IMD_q10 <- summarise_DoD_by_group(DoD_TPP_ONS_with_grace_period, IMD_q10) + +DoD_by_ethnicity <- summarise_DoD_by_group(DoD_TPP_ONS_with_grace_period, ethnicity) + +DoD_by_sex <- summarise_DoD_by_group(DoD_TPP_ONS_with_grace_period, sex) + +collate_DoD_diff_with_grace_period_table <- rbind(table_DoD_general_with_grace_period, DoD_by_age, DoD_by_rural_urban, DoD_by_ons_death_place, DoD_by_region, DoD_by_IMD_q10, DoD_by_ethnicity, DoD_by_sex) + +write.csv(collate_DoD_diff_with_grace_period_table, here::here("output", "report", "DoD_diff", "collate_DoD_diff_with_grace_period_table.csv")) diff --git a/analysis/practice_decile.R b/analysis/6_practice_by_source.R similarity index 92% rename from analysis/practice_decile.R rename to analysis/6_practice_by_source.R index 0deba75..09d94f4 100644 --- a/analysis/practice_decile.R +++ b/analysis/6_practice_by_source.R @@ -12,11 +12,11 @@ library(glue) library(here) ## Create output directory -output_dir <- here::here("output", "report", "deciles") +output_dir <- here::here("output", "report", "source_by_practice") fs::dir_create(output_dir) # Import processed data ---- -dataset0 <- read_csv("output/measures/measures_by_practice.csv") +measures_by_practice <- read_csv("output/highly_sensitive/measures_by_practice.csv") # Rounding functionn ---------------- @@ -29,7 +29,7 @@ rounding <- function(vars) { # Prepare data -------------------------------------------------------- -dataset_measure_practice <- dataset0 %>% +measure_by_practice_processed <- measures_by_practice %>% mutate( data_source = str_extract(measure, ".*(?=_mortality)"), year = as.integer(lubridate::year(interval_start)), @@ -55,7 +55,7 @@ percentiles <- as.integer(probs * 100) #Table----- # Practices with > 1000 and finite ratio -base <- dataset_measure_practice %>% +base <- measure_by_practice_processed %>% filter(global_denominator > 1000, is.finite(GP_global_perc)) # Count contributing practices per year ---- @@ -105,7 +105,7 @@ ggsave(fs::path(output_dir, glue("practice_deciles_TPP_perc.png")), plot = pract #------------------------------------ # Table: practices with 0 deaths and less than 1000 people -pract_deaths_population <- dataset_measure_practice %>% +pract_deaths_population <- measure_by_practice_processed %>% group_by(year) %>% summarise( practices_total = rounding(n_distinct(practice)), diff --git a/Final-visualizations.R b/analysis/Final-visualizations.R similarity index 100% rename from Final-visualizations.R rename to analysis/Final-visualizations.R diff --git a/analysis/Table_DoD_without_grace_period.R b/analysis/Table_DoD_without_grace_period.R deleted file mode 100644 index 9f899a2..0000000 --- a/analysis/Table_DoD_without_grace_period.R +++ /dev/null @@ -1,326 +0,0 @@ -# Preliminaries ---- - -# Import libraries -library("tidyverse") -library("dtplyr") -library("lubridate") -library("glue") -library("here") - - -## Create output directory -output_dir <- here("output", "report", "without_grace_period") -fs::dir_create(output_dir) - -# Import processed data ---- -dataset0 <- read_csv("output/dataset_death_date_diff.csv.gz") %>% - mutate( - TPP_death_date = as.Date(TPP_death_date), - ons_death_date = as.Date(ons_death_date), - date_of_birth = as.Date(date_of_birth), - age_band = as.factor(age_band), - practice = as.factor(practice), - ons_death_place = as.factor(ons_death_place), - region = as.factor(region), - IMD_q10 = as.factor(IMD_q10), - ethnicity = as.factor(ethnicity), - sex = as.factor(sex), - last_registration_start_date = as.Date(last_registration_start_date), - last_registration_end_date = as.Date(last_registration_end_date) - ) %>% - filter( - has_registration == TRUE & # was registered at the beginning of the year the person died - any_death_during_study == TRUE # died between "2009-01-01" - "2025-06-06" + (deregistration date + 30 days) is after one date of death - ) - -# Rounding function - -rounding <- function(vars) { - case_when(vars == 0 ~ 0, - vars > 7 ~ round(vars / 5) * 5) -} - -# ----------------------- -# Create variables -DoD_diff_without_grace_period_dataset <- dataset0 %>% - mutate( - death_dereg_diff_TPP = case_when( - !is.na(last_registration_end_date) ~ as.Date(last_registration_end_date) - TPP_death_date, - TRUE ~ as.difftime(NA_real_, units = "days" - )), - death_dereg_diff_ONS = case_when( - !is.na(last_registration_end_date) ~ as.Date(last_registration_end_date) - ons_death_date, - TRUE ~ as.difftime(NA_real_, units = "days" - )), - DoD_min = pmin(TPP_death_date, ons_death_date, na.rm = TRUE), - diff_DoD = TPP_death_date - ons_death_date, - TPP_death = case_when(!is.na(TPP_death_date) ~ "yes", - TRUE ~ NA_character_), - ONS_death = case_when(!is.na(ons_death_date) ~ "yes", - TRUE ~ NA_character_), - rural_urb_recode = case_when( - rural_urban < 5 ~ "urban", - rural_urban >= 5 ~ "rural", - TRUE ~ NA_character_ - ), - year_pref_ONS = if_else(!is.na(ons_death_date), year(ons_death_date), year(TPP_death_date)), - year_month_pref_ONS = if_else(!is.na(ons_death_date), format(ons_death_date, "%Y-%m"), format(TPP_death_date, "%Y-%m") - ) - ) %>% - mutate( - ONS_or_TPP = case_when( - !is.na(ONS_death) & !is.na(TPP_death) ~ "ONS & TPP", - !is.na(ONS_death) & is.na(TPP_death) ~ "ONS", - !is.na(TPP_death) & is.na(ONS_death) ~ "TPP", - is.na(TPP_death) & is.na(ONS_death) ~ NA_character_ - ), - DoD_groups = case_when( - diff_DoD == 0 ~ "0", - - diff_DoD >= 1 & diff_DoD <= 7 ~ "1-7", - diff_DoD >= 8 & diff_DoD <= 31 ~ "8-31", - diff_DoD >= 32 ~ "32+", - - diff_DoD <= -1 & diff_DoD >= -7 ~ "-1 to -7", - diff_DoD <= -8 & diff_DoD >= -31 ~ "-8 to -31", - diff_DoD <= -32 ~ "-32+", - - TRUE ~ NA_character_ - ), - DoD_dereg_tpp = case_when( - death_dereg_diff_TPP == 0 ~ "0", - - death_dereg_diff_TPP >= 1 & death_dereg_diff_TPP <= 7 ~ "1-7", - death_dereg_diff_TPP >= 8 & death_dereg_diff_TPP <= 31 ~ "8-31", - death_dereg_diff_TPP >= 32 ~ "32+", - - death_dereg_diff_TPP <= -1 & death_dereg_diff_TPP >= -7 ~ "-1 to -7", - death_dereg_diff_TPP <= -8 & death_dereg_diff_TPP >= -31 ~ "-8 to -31", - death_dereg_diff_TPP <= -32 ~ "-32+", - - TRUE ~ NA_character_ - ), - DoD_dereg_TPP_group = case_when( - death_dereg_diff_TPP == 0 ~ "0", - - death_dereg_diff_TPP >= 1 & death_dereg_diff_TPP <= 7 ~ "1-7", - death_dereg_diff_TPP >= 8 & death_dereg_diff_TPP <= 31 ~ "8-31", - death_dereg_diff_TPP >= 32 ~ "32+", - - death_dereg_diff_TPP <= -1 & death_dereg_diff_TPP >= -7 ~ "-1 to -7", - death_dereg_diff_TPP <= -8 & death_dereg_diff_TPP >= -31 ~ "-8 to -31", - death_dereg_diff_TPP <= -32 ~ "-32+", - - TRUE ~ NA_character_ - ), - DoD_dereg_ONS_group = case_when( - death_dereg_diff_ONS == 0 ~ "0", - - death_dereg_diff_ONS >= 1 & death_dereg_diff_ONS <= 7 ~ "1-7", - death_dereg_diff_ONS >= 8 & death_dereg_diff_ONS <= 31 ~ "8-31", - death_dereg_diff_ONS >= 32 ~ "32+", - - death_dereg_diff_ONS <= -1 & death_dereg_diff_ONS >= -7 ~ "-1 to -7", - death_dereg_diff_ONS <= -8 & death_dereg_diff_ONS >= -31 ~ "-8 to -31", - death_dereg_diff_ONS <= -32 ~ "-32+", - - TRUE ~ NA_character_ - ) - ) %>% - filter( - death_dereg_diff_ONS >= 0 | death_dereg_diff_TPP >= 0 - ) - - - - - -##### 1- Diff DoD---------------------------------------------- - -# Table key indicators by year -table_DoD_general_without_grace_period_dataset <- DoD_diff_without_grace_period_dataset %>% - filter(ONS_or_TPP == "ONS & TPP") %>% - group_by( - year_pref_ONS - ) %>% - mutate( - GP_ONS_annual_deaths = rounding(n()) - ) %>% - ungroup() %>% - group_by(year_pref_ONS, DoD_groups , GP_ONS_annual_deaths) %>% - summarise(count_by_group_DoD = rounding(n())) %>% - mutate( - group_var = "general population", - group_value = "general population" - ) - - -#Date diff by group -summarise_DoD_by_group <- function(data, group_var) { - group_var_name <- deparse(substitute(group_var)) - - data %>% - filter(ONS_or_TPP == "ONS & TPP") %>% - group_by(year_pref_ONS, {{ group_var }}) %>% - mutate(GP_ONS_annual_deaths = rounding(n())) %>% - ungroup() %>% - group_by(year_pref_ONS, {{ group_var }}, DoD_groups, GP_ONS_annual_deaths) %>% - summarise(count_by_group_DoD = rounding(n()), .groups = "drop") %>% - mutate( - group_var = group_var_name, - group_value = as.character({{ group_var }}) - ) %>% - select(year_pref_ONS, DoD_groups, GP_ONS_annual_deaths, count_by_group_DoD, group_var, group_value) -} - -#tables by group - -DoD_by_age <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, age_band) - -# DoD_by_practice <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, practice) - -DoD_by_ons_death_place <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, ons_death_place) - -DoD_by_region <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, region) - -DoD_by_rural_urban <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, rural_urban) - -DoD_by_IMD_q10 <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, IMD_q10) - -DoD_by_ethnicity <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, ethnicity) - -DoD_by_sex <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, sex) - -collate_DoD_diff_without_grace_period_table <- rbind(table_DoD_general_without_grace_period_dataset, DoD_by_age, DoD_by_rural_urban, DoD_by_ons_death_place, DoD_by_region, DoD_by_IMD_q10, DoD_by_ethnicity, DoD_by_sex) - -write.csv(collate_DoD_diff_without_grace_period_table, here::here("output", "report", "without_grace_period", "collate_DoD_diff_without_grace_period_table.csv")) - -# 2- Table by source -------------------------------------------------------------------------------------- -table_source_general_without_grace_period <- DoD_diff_without_grace_period_dataset %>% - group_by(year_pref_ONS) %>% - mutate(total = rounding(n())) %>% - group_by(year_pref_ONS, ONS_or_TPP, total) %>% - summarise( - count = rounding(n()), - .groups = "drop" - ) %>% - mutate( - group_var = "general population", - group_value = "general population" - ) %>% - select(year_pref_ONS, ONS_or_TPP, count, group_var, group_value) - - -table_source_by_subgroup <- function(data, group_var) { - group_var_name <- deparse(substitute(group_var)) - - data %>% - filter(year_pref_ONS > 2008) %>% - group_by(year_pref_ONS, {{ group_var }}) %>% - mutate(total = n()) %>% - group_by(year_pref_ONS, {{ group_var }}, ONS_or_TPP, total) %>% - summarise( - count = rounding(n()), - .groups = "drop" - ) %>% - mutate( - group_var = group_var_name, - group_value = as.character({{ group_var }}) - ) %>% - select(year_pref_ONS, ONS_or_TPP, count, group_var, group_value) -} - - -table_source_age <- table_source_by_subgroup(DoD_diff_without_grace_period_dataset, age_band) -table_source_ethnicity <- table_source_by_subgroup(DoD_diff_without_grace_period_dataset, ethnicity) -table_source_region <- table_source_by_subgroup(DoD_diff_without_grace_period_dataset, region) -table_source_place <- table_source_by_subgroup(DoD_diff_without_grace_period_dataset, ons_death_place) -table_source_urban <- table_source_by_subgroup(DoD_diff_without_grace_period_dataset, rural_urban) -table_source_IMD <- table_source_by_subgroup(DoD_diff_without_grace_period_dataset, IMD_q10) -table_source_sex <- table_source_by_subgroup(DoD_diff_without_grace_period_dataset, sex) - - -collate_death_source_table_without_grace_period <- bind_rows( - table_source_general_without_grace_period, - table_source_age, - table_source_ethnicity, - table_source_region, - table_source_place, - table_source_urban, - table_source_IMD, - table_source_sex -) - -write.csv(collate_death_source_table_without_grace_period, here::here("output", "report", "without_grace_period", "collate_death_source_table_without_grace_period.csv")) - - -# % by source 2020-2024 -table_source_general_20_24_without_grace_period <- DoD_diff_without_grace_period_dataset %>% - filter(year_pref_ONS > 2019 & year_pref_ONS < 2025) %>% - group_by(ONS_or_TPP) %>% - summarise( - count = rounding(n()), - .groups = "drop" - ) %>% - mutate( - group_var = "general population", - group_value = "general population", - period = "2020-2024" - ) %>% - select(period, ONS_or_TPP, count, group_var, group_value) - - - -# % by source 01-01-2025 to 06-06-2024 by month -table_source_general_2025_without_grace_period <- DoD_diff_without_grace_period_dataset %>% - filter(year_pref_ONS > 2023 & year_month_pref_ONS != "2025-06") %>% - group_by(year_month_pref_ONS) %>% - mutate(total = rounding(n())) %>% - group_by(year_month_pref_ONS, ONS_or_TPP, total) %>% - summarise( - count = rounding(n()), - .groups = "drop" - ) %>% - mutate( - group_var = "general population", - group_value = "general population" - ) %>% - rename( - period = year_month_pref_ONS - ) %>% - select(period, ONS_or_TPP, count, group_var, group_value) - -collate_death_source_table_spec_periods_without_grace_period <- bind_rows(table_source_general_2025_without_grace_period, table_source_general_20_24_without_grace_period) - -write.csv(collate_death_source_table_spec_periods_without_grace_period, here::here("output", "report", "without_grace_period", "collate_death_source_table_spec_periods_without_grace_period.csv")) - -# Diff deregistration - death --------------------------------- -by_year_dereg_DoD_diff_without_grace_period <- DoD_diff_without_grace_period_dataset %>% - select( - year_pref_ONS, - DoD_dereg_ONS_group, - DoD_dereg_TPP_group - ) %>% - pivot_longer( - cols = c(DoD_dereg_ONS_group, DoD_dereg_TPP_group), - names_to = "source", - values_to = "dereg_group" - ) %>% - mutate( - source = case_when( - source == "DoD_dereg_ONS_group" ~ "ONS", - source == "DoD_dereg_TPP_group" ~ "TPP", - TRUE ~ source - ) - ) %>% - group_by(year_pref_ONS, source, dereg_group) %>% - summarise(n = rounding(n()), .groups = "drop") %>% - group_by(year_pref_ONS, source) %>% - mutate( - total_year_source = sum(n), - prop = n / total_year_source - ) %>% - ungroup() - -write.csv(by_year_dereg_DoD_diff_without_grace_period, here::here("output", "report", "without_grace_period", "by_year_dereg_DoD_diff_without_grace_period.csv")) - diff --git a/analysis/dataset_definition.py b/analysis/dataset_def/dataset_definition.py similarity index 98% rename from analysis/dataset_definition.py rename to analysis/dataset_def/dataset_definition.py index 1b321fb..42abaec 100644 --- a/analysis/dataset_definition.py +++ b/analysis/dataset_def/dataset_definition.py @@ -194,7 +194,7 @@ # Dummy data configuration dataset.configure_dummy_data(population_size=10000, timeout=180, additional_population_constraint=( - dataset.TPP_death_date.is_on_or_between("2020-01-01","2020-05-01") & - dataset.death_coded_date.is_on_or_between("2020-01-01","2020-05-01") + dataset.TPP_death_date.is_on_or_between("2009-01-01","2025-05-01") & + dataset.death_coded_date.is_on_or_between("2019-01-01","2025-05-01") + ) ) - ) \ No newline at end of file diff --git a/analysis/measure_def.py b/analysis/dataset_def/measure_def.py similarity index 100% rename from analysis/measure_def.py rename to analysis/dataset_def/measure_def.py diff --git a/analysis/measure_practice.py b/analysis/dataset_def/measure_practice.py similarity index 100% rename from analysis/measure_practice.py rename to analysis/dataset_def/measure_practice.py diff --git a/analysis/Table_DoD.R b/analysis/old/Table_DoD.R similarity index 100% rename from analysis/Table_DoD.R rename to analysis/old/Table_DoD.R diff --git a/analysis/old/Table_DoD_without_grace_period.R b/analysis/old/Table_DoD_without_grace_period.R new file mode 100644 index 0000000..17b8f6c --- /dev/null +++ b/analysis/old/Table_DoD_without_grace_period.R @@ -0,0 +1,327 @@ +# Preliminaries ---- + +# Import libraries +library("tidyverse") +library("dtplyr") +library("lubridate") +library("glue") +library("here") + + +## Create output directory +output_dir <- here("output", "highly_sensitive") +# fs::dir_create(output_dir) + +# Import processed data ---- +dataset_death_TPP_ONS <- read_csv("output/highly_sensitive/dataset_death_TPP_ONS.csv.gz") %>% + mutate( + TPP_death_date = as.Date(TPP_death_date), + ons_death_date = as.Date(ons_death_date), + date_of_birth = as.Date(date_of_birth), + age_band = as.factor(age_band), + practice = as.factor(practice), + ons_death_place = as.factor(ons_death_place), + region = as.factor(region), + IMD_q10 = as.factor(IMD_q10), + ethnicity = as.factor(ethnicity), + sex = as.factor(sex), + last_registration_start_date = as.Date(last_registration_start_date), + last_registration_end_date = as.Date(last_registration_end_date) + ) %>% + filter( + has_registration == TRUE & # was registered at the beginning of the year the person died + any_death_during_study == TRUE # died between "2009-01-01" - "2025-06-06" + (deregistration date + 30 days) is after one date of death + ) + +# Rounding function + +rounding <- function(vars) { + case_when(vars == 0 ~ 0, + vars > 7 ~ round(vars / 5) * 5) +} + +# ----------------------- +# Create variables +DoD_TPP_ONS_with_grace_period <- dataset_death_TPP_ONS %>% + mutate( + death_dereg_diff_TPP = case_when( + !is.na(last_registration_end_date) ~ as.Date(last_registration_end_date) - TPP_death_date, + TRUE ~ as.difftime(NA_real_, units = "days" + )), + death_dereg_diff_ONS = case_when( + !is.na(last_registration_end_date) ~ as.Date(last_registration_end_date) - ons_death_date, + TRUE ~ as.difftime(NA_real_, units = "days" + )), + DoD_min = pmin(TPP_death_date, ons_death_date, na.rm = TRUE), + diff_DoD = TPP_death_date - ons_death_date, + TPP_death = case_when(!is.na(TPP_death_date) ~ "yes", + TRUE ~ NA_character_), + ONS_death = case_when(!is.na(ons_death_date) ~ "yes", + TRUE ~ NA_character_), + rural_urb_recode = case_when( + rural_urban < 5 ~ "urban", + rural_urban >= 5 ~ "rural", + TRUE ~ NA_character_ + ), + year_pref_ONS = if_else(!is.na(ons_death_date), year(ons_death_date), year(TPP_death_date)), + year_month_pref_ONS = if_else(!is.na(ons_death_date), format(ons_death_date, "%Y-%m"), format(TPP_death_date, "%Y-%m") + ) + ) %>% + mutate( + ONS_or_TPP = case_when( + !is.na(ONS_death) & !is.na(TPP_death) ~ "ONS & TPP", + !is.na(ONS_death) & is.na(TPP_death) ~ "ONS", + !is.na(TPP_death) & is.na(ONS_death) ~ "TPP", + is.na(TPP_death) & is.na(ONS_death) ~ NA_character_ + ), + DoD_groups = case_when( + diff_DoD == 0 ~ "0", + + diff_DoD >= 1 & diff_DoD <= 7 ~ "1-7", + diff_DoD >= 8 & diff_DoD <= 31 ~ "8-31", + diff_DoD >= 32 ~ "32+", + + diff_DoD <= -1 & diff_DoD >= -7 ~ "-1 to -7", + diff_DoD <= -8 & diff_DoD >= -31 ~ "-8 to -31", + diff_DoD <= -32 ~ "-32+", + + TRUE ~ NA_character_ + ), + DoD_dereg_tpp = case_when( + death_dereg_diff_TPP == 0 ~ "0", + + death_dereg_diff_TPP >= 1 & death_dereg_diff_TPP <= 7 ~ "1-7", + death_dereg_diff_TPP >= 8 & death_dereg_diff_TPP <= 31 ~ "8-31", + death_dereg_diff_TPP >= 32 ~ "32+", + + death_dereg_diff_TPP <= -1 & death_dereg_diff_TPP >= -7 ~ "-1 to -7", + death_dereg_diff_TPP <= -8 & death_dereg_diff_TPP >= -31 ~ "-8 to -31", + death_dereg_diff_TPP <= -32 ~ "-32+", + + TRUE ~ NA_character_ + ), + DoD_dereg_TPP_group = case_when( + death_dereg_diff_TPP == 0 ~ "0", + + death_dereg_diff_TPP >= 1 & death_dereg_diff_TPP <= 7 ~ "1-7", + death_dereg_diff_TPP >= 8 & death_dereg_diff_TPP <= 31 ~ "8-31", + death_dereg_diff_TPP >= 32 ~ "32+", + + death_dereg_diff_TPP <= -1 & death_dereg_diff_TPP >= -7 ~ "-1 to -7", + death_dereg_diff_TPP <= -8 & death_dereg_diff_TPP >= -31 ~ "-8 to -31", + death_dereg_diff_TPP <= -32 ~ "-32+", + + TRUE ~ NA_character_ + ), + DoD_dereg_ONS_group = case_when( + death_dereg_diff_ONS == 0 ~ "0", + + death_dereg_diff_ONS >= 1 & death_dereg_diff_ONS <= 7 ~ "1-7", + death_dereg_diff_ONS >= 8 & death_dereg_diff_ONS <= 31 ~ "8-31", + death_dereg_diff_ONS >= 32 ~ "32+", + + death_dereg_diff_ONS <= -1 & death_dereg_diff_ONS >= -7 ~ "-1 to -7", + death_dereg_diff_ONS <= -8 & death_dereg_diff_ONS >= -31 ~ "-8 to -31", + death_dereg_diff_ONS <= -32 ~ "-32+", + + TRUE ~ NA_character_ + ) + ) # %>% + #filter( + # death_dereg_diff_ONS >= 0 | death_dereg_diff_TPP >= 0 + #) + +write.csv(DoD_TPP_ONS_with_grace_period, here::here("output", "report", "without_grace_period", "DoD_TPP_ONS_with_grace_period.csv")) + + + + +# ##### 1- Diff DoD---------------------------------------------- +# +# # Table key indicators by year +# table_DoD_general_without_grace_period_dataset <- DoD_diff_without_grace_period_dataset %>% +# filter(ONS_or_TPP == "ONS & TPP") %>% +# group_by( +# year_pref_ONS +# ) %>% +# mutate( +# GP_ONS_annual_deaths = rounding(n()) +# ) %>% +# ungroup() %>% +# group_by(year_pref_ONS, DoD_groups , GP_ONS_annual_deaths) %>% +# summarise(count_by_group_DoD = rounding(n())) %>% +# mutate( +# group_var = "general population", +# group_value = "general population" +# ) +# +# +# #Date diff by group +# summarise_DoD_by_group <- function(data, group_var) { +# group_var_name <- deparse(substitute(group_var)) +# +# data %>% +# filter(ONS_or_TPP == "ONS & TPP") %>% +# group_by(year_pref_ONS, {{ group_var }}) %>% +# mutate(GP_ONS_annual_deaths = rounding(n())) %>% +# ungroup() %>% +# group_by(year_pref_ONS, {{ group_var }}, DoD_groups, GP_ONS_annual_deaths) %>% +# summarise(count_by_group_DoD = rounding(n()), .groups = "drop") %>% +# mutate( +# group_var = group_var_name, +# group_value = as.character({{ group_var }}) +# ) %>% +# select(year_pref_ONS, DoD_groups, GP_ONS_annual_deaths, count_by_group_DoD, group_var, group_value) +# } +# +# #tables by group +# +# DoD_by_age <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, age_band) +# +# # DoD_by_practice <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, practice) +# +# DoD_by_ons_death_place <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, ons_death_place) +# +# DoD_by_region <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, region) +# +# DoD_by_rural_urban <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, rural_urban) +# +# DoD_by_IMD_q10 <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, IMD_q10) +# +# DoD_by_ethnicity <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, ethnicity) +# +# DoD_by_sex <- summarise_DoD_by_group(DoD_diff_without_grace_period_dataset, sex) +# +# collate_DoD_diff_without_grace_period_table <- rbind(table_DoD_general_without_grace_period_dataset, DoD_by_age, DoD_by_rural_urban, DoD_by_ons_death_place, DoD_by_region, DoD_by_IMD_q10, DoD_by_ethnicity, DoD_by_sex) +# +# write.csv(collate_DoD_diff_without_grace_period_table, here::here("output", "report", "without_grace_period", "collate_DoD_diff_without_grace_period_table.csv")) +# +# # 2- Table by source -------------------------------------------------------------------------------------- +# table_source_general_without_grace_period <- DoD_diff_without_grace_period_dataset %>% +# group_by(year_pref_ONS) %>% +# mutate(total = rounding(n())) %>% +# group_by(year_pref_ONS, ONS_or_TPP, total) %>% +# summarise( +# count = rounding(n()), +# .groups = "drop" +# ) %>% +# mutate( +# group_var = "general population", +# group_value = "general population" +# ) %>% +# select(year_pref_ONS, ONS_or_TPP, count, group_var, group_value) +# +# +# table_source_by_subgroup <- function(data, group_var) { +# group_var_name <- deparse(substitute(group_var)) +# +# data %>% +# filter(year_pref_ONS > 2008) %>% +# group_by(year_pref_ONS, {{ group_var }}) %>% +# mutate(total = n()) %>% +# group_by(year_pref_ONS, {{ group_var }}, ONS_or_TPP, total) %>% +# summarise( +# count = rounding(n()), +# .groups = "drop" +# ) %>% +# mutate( +# group_var = group_var_name, +# group_value = as.character({{ group_var }}) +# ) %>% +# select(year_pref_ONS, ONS_or_TPP, count, group_var, group_value) +# } +# +# +# table_source_age <- table_source_by_subgroup(DoD_diff_without_grace_period_dataset, age_band) +# table_source_ethnicity <- table_source_by_subgroup(DoD_diff_without_grace_period_dataset, ethnicity) +# table_source_region <- table_source_by_subgroup(DoD_diff_without_grace_period_dataset, region) +# table_source_place <- table_source_by_subgroup(DoD_diff_without_grace_period_dataset, ons_death_place) +# table_source_urban <- table_source_by_subgroup(DoD_diff_without_grace_period_dataset, rural_urban) +# table_source_IMD <- table_source_by_subgroup(DoD_diff_without_grace_period_dataset, IMD_q10) +# table_source_sex <- table_source_by_subgroup(DoD_diff_without_grace_period_dataset, sex) +# +# +# collate_death_source_table_without_grace_period <- bind_rows( +# table_source_general_without_grace_period, +# table_source_age, +# table_source_ethnicity, +# table_source_region, +# table_source_place, +# table_source_urban, +# table_source_IMD, +# table_source_sex +# ) +# +# write.csv(collate_death_source_table_without_grace_period, here::here("output", "report", "without_grace_period", "collate_death_source_table_without_grace_period.csv")) +# +# +# # % by source 2020-2024 +# table_source_general_20_24_without_grace_period <- DoD_diff_without_grace_period_dataset %>% +# filter(year_pref_ONS > 2019 & year_pref_ONS < 2025) %>% +# group_by(ONS_or_TPP) %>% +# summarise( +# count = rounding(n()), +# .groups = "drop" +# ) %>% +# mutate( +# group_var = "general population", +# group_value = "general population", +# period = "2020-2024" +# ) %>% +# select(period, ONS_or_TPP, count, group_var, group_value) +# +# +# +# # % by source 01-01-2025 to 06-06-2024 by month +# table_source_general_2025_without_grace_period <- DoD_diff_without_grace_period_dataset %>% +# filter(year_pref_ONS > 2023 & year_month_pref_ONS != "2025-06") %>% +# group_by(year_month_pref_ONS) %>% +# mutate(total = rounding(n())) %>% +# group_by(year_month_pref_ONS, ONS_or_TPP, total) %>% +# summarise( +# count = rounding(n()), +# .groups = "drop" +# ) %>% +# mutate( +# group_var = "general population", +# group_value = "general population" +# ) %>% +# rename( +# period = year_month_pref_ONS +# ) %>% +# select(period, ONS_or_TPP, count, group_var, group_value) +# +# collate_death_source_table_spec_periods_without_grace_period <- bind_rows(table_source_general_2025_without_grace_period, table_source_general_20_24_without_grace_period) +# +# write.csv(collate_death_source_table_spec_periods_without_grace_period, here::here("output", "report", "without_grace_period", "collate_death_source_table_spec_periods_without_grace_period.csv")) +# +# # Diff deregistration - death --------------------------------- +# by_year_dereg_DoD_diff_without_grace_period <- DoD_diff_without_grace_period_dataset %>% +# select( +# year_pref_ONS, +# DoD_dereg_ONS_group, +# DoD_dereg_TPP_group +# ) %>% +# pivot_longer( +# cols = c(DoD_dereg_ONS_group, DoD_dereg_TPP_group), +# names_to = "source", +# values_to = "dereg_group" +# ) %>% +# mutate( +# source = case_when( +# source == "DoD_dereg_ONS_group" ~ "ONS", +# source == "DoD_dereg_TPP_group" ~ "TPP", +# TRUE ~ source +# ) +# ) %>% +# group_by(year_pref_ONS, source, dereg_group) %>% +# summarise(n = rounding(n()), .groups = "drop") %>% +# group_by(year_pref_ONS, source) %>% +# mutate( +# total_year_source = sum(n), +# prop = n / total_year_source +# ) %>% +# ungroup() +# +# write.csv(by_year_dereg_DoD_diff_without_grace_period, here::here("output", "report", "without_grace_period", "by_year_dereg_DoD_diff_without_grace_period.csv")) +# diff --git a/analysis/Table_measure_rate.R b/analysis/old/Table_measure_rate.R similarity index 100% rename from analysis/Table_measure_rate.R rename to analysis/old/Table_measure_rate.R diff --git a/project.yaml b/project.yaml index 563fa56..b8a53bb 100644 --- a/project.yaml +++ b/project.yaml @@ -2,80 +2,82 @@ version: '4.0' actions: #Dataset for diff time - dataset_death_date_diff: - run: ehrql:v1 generate-dataset analysis/dataset_definition.py --output output/dataset_death_date_diff.csv.gz + dataset_death_TPP_ONS: + run: ehrql:v1 generate-dataset analysis/dataset_def/dataset_definition.py --output output/highly_sensitive/dataset_death_TPP_ONS.csv.gz outputs: highly_sensitive: - dataset: output/dataset_death_date_diff.csv.gz + dataset: output/highly_sensitive/dataset_death_TPP_ONS.csv.gz summary_stat: - run: r:v2 analysis/summary_stats.R - --output output/DoD/*.csv - needs: [dataset_death_date_diff] + run: r:v2 analysis/1_summary_stats.R + --output output/summary_stats/*.csv + needs: [dataset_death_TPP_ONS] outputs: moderately_sensitive: - tables: output/DoD/*.csv + tables: output/summary_stats/*.csv reg_dereg_ONS_TPP: - run: r:v2 analysis/tables_ONS_TPP_reg_dereg.R + run: r:v2 analysis/2_tables_ONS_TPP_reg_dereg.R --output output/report/reg_dereg_ONS_TPP/*.csv - needs: [dataset_death_date_diff] + needs: [dataset_death_TPP_ONS] outputs: moderately_sensitive: csv: output/report/reg_dereg_ONS_TPP/*.csv - table_DoD: - run: r:v2 analysis/Table_DoD.R - --output output/report/table_DoD/*.csv - needs: [dataset_death_date_diff] + death_source: + run: r:v2 analysis/4_source_prop.R + --output output/report/by_source/*.csv + needs: [dataset_death_TPP_ONS] outputs: moderately_sensitive: - csv: output/report/table_DoD/*.csv + csv: output/report/by_source/*.csv - - visualization: - run: r:v2 Final-visualizations.R - needs: [table_DoD] - outputs: - moderately_sensitive: - csv: output/final_visualization/*.csv - png: output/final_visualization/*.png - - table_DoD_without_grace_period: - run: r:v2 analysis/Table_DoD_without_grace_period.R - --output output/report/without_grace_period/*.csv - needs: [dataset_death_date_diff] + DoD_difference: + run: r:v2 analysis/5_DoD_diff.R + --output output/report/DoD_diff/*.csv + needs: [dataset_death_TPP_ONS] outputs: moderately_sensitive: - csv: output/report/without_grace_period/*.csv + csv: output/report/DoD_diff/*.csv # Measures - measures_overall: - run: ehrql:v1 generate-measures analysis/measure_def.py - --output output/measures/measures_overall.csv - outputs: - moderately_sensitive: - measure_csv: output/measures/measures_overall.csv + # measures_overall: + # run: ehrql:v1 generate-measures analysis/measure_def.py + # --output output/measures/measures_overall.csv + # outputs: + # moderately_sensitive: + # measure_csv: output/measures/measures_overall.csv - table_measure_rate: - run: r:v2 analysis/Table_measure_rate.R - --output output/report/*.csv - needs: [measures_overall] - outputs: - moderately_sensitive: - categorical: output/report/collate_measures_rate_table.csv + # table_measure_rate: + # run: r:v2 analysis/Table_measure_rate.R + # --output output/report/*.csv + # needs: [measures_overall] + # outputs: + # moderately_sensitive: + # categorical: output/report/collate_measures_rate_table.csv measures_by_practice: - run: ehrql:v1 generate-measures analysis/measure_practice.py - --output output/measures/measures_by_practice.csv + run: ehrql:v1 generate-measures analysis/dataset_def/measure_practice.py + --output output/highly_sensitive/measures_by_practice.csv outputs: highly_sensitive: - measure_csv: output/measures/measures_by_practice.csv + measure_csv: output/highly_sensitive/measures_by_practice.csv decile_table_figure: - run: r:v2 analysis/practice_decile.R + run: r:v2 analysis/6_practice_by_source.R needs: [measures_by_practice] outputs: moderately_sensitive: - csv: output/report/deciles/*.csv - png: output/report/deciles/*.png \ No newline at end of file + csv: output/report/source_by_practice/*.csv + png: output/report/source_by_practice/*.png + + + # visualization: + # run: r:v2 Final-visualizations.R + # needs: [table_DoD] + # outputs: + # moderately_sensitive: + # csv: output/final_visualization/*.csv + # png: output/final_visualization/*.png + +# \ No newline at end of file From 269b3e64e52897ec305fe28ca9b1d61910e1a382 Mon Sep 17 00:00:00 2001 From: Martina Pesce <82039235+marrpesce@users.noreply.github.com> Date: Wed, 28 Jan 2026 09:57:25 +0000 Subject: [PATCH 5/5] fix yaml bug --- project.yaml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/project.yaml b/project.yaml index b8a53bb..889f15c 100644 --- a/project.yaml +++ b/project.yaml @@ -24,10 +24,18 @@ actions: moderately_sensitive: csv: output/report/reg_dereg_ONS_TPP/*.csv + Dataset_source_dod_diff_processed: + run: r:v2 analysis/3_dataset_DoD_TPP_ONS_with_grace_period.R + --output output/highly_sensitive/DoD_TPP_ONS_with_grace_period.csv + needs: [dataset_death_TPP_ONS] + outputs: + highly_sensitive: + csv: output/highly_sensitive/DoD_TPP_ONS_with_grace_period.csv + death_source: run: r:v2 analysis/4_source_prop.R --output output/report/by_source/*.csv - needs: [dataset_death_TPP_ONS] + needs: [Dataset_source_dod_diff_processed] outputs: moderately_sensitive: csv: output/report/by_source/*.csv @@ -35,7 +43,7 @@ actions: DoD_difference: run: r:v2 analysis/5_DoD_diff.R --output output/report/DoD_diff/*.csv - needs: [dataset_death_TPP_ONS] + needs: [Dataset_source_dod_diff_processed] outputs: moderately_sensitive: csv: output/report/DoD_diff/*.csv