From daba01d108788f3772e61ef2c11de6965c0cb8ec Mon Sep 17 00:00:00 2001 From: Yamina Boukari <87201452+YaminaB@users.noreply.github.com> Date: Thu, 26 Mar 2026 15:42:14 +0000 Subject: [PATCH 1/4] update 6 cat variable --- analysis/migration_status_variables.py | 28 +++++--------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/analysis/migration_status_variables.py b/analysis/migration_status_variables.py index b232a22..1aa4f96 100644 --- a/analysis/migration_status_variables.py +++ b/analysis/migration_status_variables.py @@ -67,7 +67,7 @@ def build_mig_status_6_cat(migrant_indicators): - Likely migrant: english_not_main_language OR interpreter_required - Definite non-migrant: born_in_uk - Likely non-migrant: british_ethnicities AND no migrant code - - Unknown: none of the above + - Unknown: no migrant codes """ migrant = migrant_indicators.get("any_migrant", False) not_born_in_uk = migrant_indicators.get("not_born_in_uk", False) @@ -82,32 +82,14 @@ def build_mig_status_6_cat(migrant_indicators): highly_likely = immig_excl | refugee_asylum likely_migrant = english_not_main | interpreter_required likely_non_migrant = ((british_ethnicities) & ~migrant) + unknown = (~migrant) return case( when(not_born_in_uk).then("Definite migrant"), + when(born_in_uk).then("Definite non-migrant"), when(highly_likely).then("Highly likely migrant"), when(likely_migrant).then("Likely migrant"), - when(born_in_uk).then("Definite non-migrant"), when(likely_non_migrant).then("Likely non-migrant"), - otherwise="Unknown" + when(unknown).then("Unknown"), + otherwise="Error" ) - - # # Build the case expression in precedence order - # clauses = [ - # (not_born_in_uk, "Definite migrant"), - # (highly_likely, "Highly likely migrant"), - # (likely_migrant, "Likely migrant"), - # (likely_non_migrant, "Likely non-migrant"), - # (born_in_uk, "Definite non-migrant"), - # ] - - # # Start assembling call to case(...) with only the non-empty conditions - # case_args = [] - # for cond, label in clauses: - # case_args.append(when(cond).then(label)) - - # return case( - # *case_args, - # otherwise="Unknown" - # ) - From 5206571e4487b64a6ffb9386c107b6af8c418b3a Mon Sep 17 00:00:00 2001 From: Yamina Boukari <87201452+YaminaB@users.noreply.github.com> Date: Thu, 26 Mar 2026 16:23:43 +0000 Subject: [PATCH 2/4] updated codelists and removed overall annual counts code --- .../generate_annual_migrant_counts_2cat.py | 18 ++++---- .../generate_annual_migrant_counts_3cat.py | 18 ++++---- .../generate_annual_migrant_counts_6cat.py | 20 ++++---- ...l_migrant_counts_migration_status_types.py | 16 +++---- codelists/codelists.json | 46 +++++++++++-------- codelists/codelists.txt | 11 +++-- codelists/opensafely-born-outside-the-uk.csv | 24 ++++++++++ .../opensafely-english-not-main-language.csv | 4 ++ ...immigration-status-excl-refugee-asylum.csv | 15 +++++- codelists/opensafely-interpreter-required.csv | 10 ++++ codelists/opensafely-migration-status.csv | 27 ++--------- ...nsafely-trafficking-and-modern-slavery.csv | 15 ++++++ 12 files changed, 138 insertions(+), 86 deletions(-) create mode 100644 codelists/opensafely-trafficking-and-modern-slavery.csv diff --git a/analysis/generate_annual_migrant_counts_2cat.py b/analysis/generate_annual_migrant_counts_2cat.py index 29c7a76..1d6d3d8 100644 --- a/analysis/generate_annual_migrant_counts_2cat.py +++ b/analysis/generate_annual_migrant_counts_2cat.py @@ -33,14 +33,14 @@ # overall (ungrouped) measures -for label in labels: - bool_numer = (mig2_expr == label) - safe_label = label.lower().replace(" ", "_").replace("-", "_") - var_name = "mig_status_2_cat_overall" +# for label in labels: +# bool_numer = (mig2_expr == label) +# safe_label = label.lower().replace(" ", "_").replace("-", "_") +# var_name = "mig_status_2_cat_overall" - name = f"{var_name}_{safe_label}" +# name = f"{var_name}_{safe_label}" - measures.define_measure( - name=name, - numerator=bool_numer - ) \ No newline at end of file +# measures.define_measure( +# name=name, +# numerator=bool_numer +# ) \ No newline at end of file diff --git a/analysis/generate_annual_migrant_counts_3cat.py b/analysis/generate_annual_migrant_counts_3cat.py index 22a5fea..6e825db 100644 --- a/analysis/generate_annual_migrant_counts_3cat.py +++ b/analysis/generate_annual_migrant_counts_3cat.py @@ -33,14 +33,14 @@ # overall (ungrouped) measures -for label in labels: - bool_numer = (mig3_expr == label) - safe_label = label.lower().replace(" ", "_").replace("-", "_") - var_name = "mig_status_3_cat_overall" +# for label in labels: +# bool_numer = (mig3_expr == label) +# safe_label = label.lower().replace(" ", "_").replace("-", "_") +# var_name = "mig_status_3_cat_overall" - name = f"{var_name}_{safe_label}" +# name = f"{var_name}_{safe_label}" - measures.define_measure( - name=name, - numerator=bool_numer - ) \ No newline at end of file +# measures.define_measure( +# name=name, +# numerator=bool_numer +# ) \ No newline at end of file diff --git a/analysis/generate_annual_migrant_counts_6cat.py b/analysis/generate_annual_migrant_counts_6cat.py index 5c0a218..176a90c 100644 --- a/analysis/generate_annual_migrant_counts_6cat.py +++ b/analysis/generate_annual_migrant_counts_6cat.py @@ -38,16 +38,16 @@ name = f"{var_name}_{safe_label}_{suffix}" measures.define_measure(name=name, numerator=bool_numer, group_by=group) -# overall (ungrouped) measures +# # overall (ungrouped) measures -for label in labels: - bool_numer = (mig6_expr == label) - safe_label = label.lower().replace(" ", "_").replace("-", "_") - var_name = "mig_status_6_cat_overall" +# for label in labels: +# bool_numer = (mig6_expr == label) +# safe_label = label.lower().replace(" ", "_").replace("-", "_") +# var_name = "mig_status_6_cat_overall" - name = f"{var_name}_{safe_label}" +# name = f"{var_name}_{safe_label}" - measures.define_measure( - name=name, - numerator=bool_numer - ) \ No newline at end of file +# measures.define_measure( +# name=name, +# numerator=bool_numer +# ) \ No newline at end of file diff --git a/analysis/generate_annual_migrant_counts_migration_status_types.py b/analysis/generate_annual_migrant_counts_migration_status_types.py index 1f5fdae..db0b592 100644 --- a/analysis/generate_annual_migrant_counts_migration_status_types.py +++ b/analysis/generate_annual_migrant_counts_migration_status_types.py @@ -41,13 +41,13 @@ # Overall (no grouping) counts -var_name = "migration_status_types_overall" +# var_name = "migration_status_types_overall" -for key, expr in numerators_separate.items(): - safe_label = key.lower().replace(" ", "_").replace("-", "_") - name = f"{var_name}_{safe_label}" +# for key, expr in numerators_separate.items(): +# safe_label = key.lower().replace(" ", "_").replace("-", "_") +# name = f"{var_name}_{safe_label}" - measures.define_measure( - name=name, - numerator=expr - ) \ No newline at end of file +# measures.define_measure( +# name=name, +# numerator=expr +# ) \ No newline at end of file diff --git a/codelists/codelists.json b/codelists/codelists.json index 02deb34..2ca548f 100644 --- a/codelists/codelists.json +++ b/codelists/codelists.json @@ -7,10 +7,10 @@ "sha": "d95a2f826299563aa57f6fa99009e6e13a65151b" }, "opensafely-migration-status.csv": { - "id": "opensafely/migration-status/4095ab22", - "url": "https://www.opencodelists.org/codelist/opensafely/migration-status/4095ab22/", - "downloaded_at": "2026-02-05 12:52:41.927224Z", - "sha": "93be296d33160a9d041c9cce118ea00659c0fd34" + "id": "opensafely/migration-status/03277ba3", + "url": "https://www.opencodelists.org/codelist/opensafely/migration-status/03277ba3/", + "downloaded_at": "2026-03-26 16:04:16.382479Z", + "sha": "f71b5b916caff6c92326466f2ce05d040e778af5" }, "opensafely-born-in-the-uk.csv": { "id": "opensafely/born-in-the-uk/4aa36068", @@ -19,16 +19,16 @@ "sha": "43b8618968d5a6baf9ce130ea54c27b6a6aa2716" }, "opensafely-born-outside-the-uk.csv": { - "id": "opensafely/born-outside-the-uk/1d23465b", - "url": "https://www.opencodelists.org/codelist/opensafely/born-outside-the-uk/1d23465b/", - "downloaded_at": "2025-12-16 13:30:41.107400Z", - "sha": "667afb120ab5f45df6280eae0ee8fdfa9d955b2e" + "id": "opensafely/born-outside-the-uk/51089538", + "url": "https://www.opencodelists.org/codelist/opensafely/born-outside-the-uk/51089538/", + "downloaded_at": "2026-03-26 16:04:16.527826Z", + "sha": "9029f3a1e72f92754b2e324b871a25801bde4e61" }, "opensafely-immigration-status-excl-refugee-asylum.csv": { - "id": "opensafely/immigration-status-excl-refugee-asylum/1ca8661b", - "url": "https://www.opencodelists.org/codelist/opensafely/immigration-status-excl-refugee-asylum/1ca8661b/", - "downloaded_at": "2026-02-05 12:52:42.413609Z", - "sha": "4f98ae3177276a1c9c49094dcb7e6792c3afda3a" + "id": "opensafely/immigration-status-excl-refugee-asylum/6a18086d", + "url": "https://www.opencodelists.org/codelist/opensafely/immigration-status-excl-refugee-asylum/6a18086d/", + "downloaded_at": "2026-03-26 16:04:16.597822Z", + "sha": "74bb584b67e52636941b4a3578b0a1a5a58790b2" }, "opensafely-asylum-or-refugee-status.csv": { "id": "opensafely/asylum-or-refugee-status/209cb9e0", @@ -37,22 +37,28 @@ "sha": "ecd46bc32ff596afcb298dc249588fbe0efc907b" }, "opensafely-english-not-main-language.csv": { - "id": "opensafely/english-not-main-language/5c450c53", - "url": "https://www.opencodelists.org/codelist/opensafely/english-not-main-language/5c450c53/", - "downloaded_at": "2026-02-05 12:52:43.216448Z", - "sha": "6996e6a8c345d281ea69d4e0c2cd28e1a34dcfe9" + "id": "opensafely/english-not-main-language/0eb92f0a", + "url": "https://www.opencodelists.org/codelist/opensafely/english-not-main-language/0eb92f0a/", + "downloaded_at": "2026-03-26 16:04:16.757432Z", + "sha": "881fb0517a7d48aedd6e458dbe1f11cea53758fe" }, "opensafely-interpreter-required.csv": { - "id": "opensafely/interpreter-required/556e6040", - "url": "https://www.opencodelists.org/codelist/opensafely/interpreter-required/556e6040/", - "downloaded_at": "2025-12-16 13:47:00.356076Z", - "sha": "a40c90d5145744c3c6e447a818c10b2e3aca3a75" + "id": "opensafely/interpreter-required/5734c8b3", + "url": "https://www.opencodelists.org/codelist/opensafely/interpreter-required/5734c8b3/", + "downloaded_at": "2026-03-26 16:04:16.837247Z", + "sha": "9d4dafb36cf2896c418e1cff63bae31769e3a797" }, "opensafely-british-ethnicities.csv": { "id": "opensafely/british-ethnicities/6a897fb0", "url": "https://www.opencodelists.org/codelist/opensafely/british-ethnicities/6a897fb0/", "downloaded_at": "2026-03-05 15:35:19.516161Z", "sha": "b29fb0154f75840482e61f4a66d62af208b31529" + }, + "opensafely-trafficking-and-modern-slavery.csv": { + "id": "opensafely/trafficking-and-modern-slavery/0074bbad", + "url": "https://www.opencodelists.org/codelist/opensafely/trafficking-and-modern-slavery/0074bbad/", + "downloaded_at": "2026-03-26 16:04:16.998062Z", + "sha": "c315d06d1b78dd3b77630e6b38a700338b763b45" } } } \ No newline at end of file diff --git a/codelists/codelists.txt b/codelists/codelists.txt index 40ecc5f..0531b84 100644 --- a/codelists/codelists.txt +++ b/codelists/codelists.txt @@ -1,9 +1,10 @@ opensafely/ethnicity-snomed-0removed/22911876/ -opensafely/migration-status/4095ab22/ +opensafely/migration-status/03277ba3/ opensafely/born-in-the-uk/4aa36068/ -opensafely/born-outside-the-uk/1d23465b/ -opensafely/immigration-status-excl-refugee-asylum/1ca8661b/ +opensafely/born-outside-the-uk/51089538/ +opensafely/immigration-status-excl-refugee-asylum/6a18086d/ opensafely/asylum-or-refugee-status/209cb9e0/ -opensafely/english-not-main-language/5c450c53/ -opensafely/interpreter-required/556e6040/ +opensafely/english-not-main-language/0eb92f0a/ +opensafely/interpreter-required/5734c8b3/ opensafely/british-ethnicities/6a897fb0/ +opensafely/trafficking-and-modern-slavery/0074bbad \ No newline at end of file diff --git a/codelists/opensafely-born-outside-the-uk.csv b/codelists/opensafely-born-outside-the-uk.csv index f36f343..344e8b6 100644 --- a/codelists/opensafely-born-outside-the-uk.csv +++ b/codelists/opensafely-born-outside-the-uk.csv @@ -1,5 +1,6 @@ code,term 1193634005,Born in Cabo Verde +138451002,Country of birth (European) 138452009,Born in Albania 138453004,Born in Andorra 138454005,Born in Austria @@ -44,6 +45,7 @@ code,term 138497000,Born in Ukraine 138498005,Born in Vatican City 138500006,Born in Yugoslavia +138501005,Country of birth (Asian) 138502003,Born in Afghanistan 138503008,Born in Armenia 138504002,Born in Bahrain @@ -96,6 +98,7 @@ code,term 138551000,Born in Uzbekistan 138552007,Born in Vietnam 138553002,Born in Yemen +138554008,Country of birth (American) 138555009,Born in Argentina 138556005,Born in Belize 138557001,Born in Bolivia @@ -120,6 +123,7 @@ code,term 138577008,Born in USA 138578003,Born in Uruguay 138579006,Born in Venezuela +138580009,Country of birth (African) 138581008,Born in Algeria 138582001,Born in Angola 138583006,Born in Benin @@ -171,8 +175,10 @@ code,term 138629000,Born in Zaire 138630005,Born in Zambia 138631009,Born in Zimbabwe +138632002,Country of birth (Australasian) 138633007,Born in Australia 138634001,Born in New Zealand +138635000,Country of birth (Atlantic) 138636004,Born in Antigua and Barbuda 138637008,Born in Bahamas 138638003,Born in Barbados @@ -186,6 +192,7 @@ code,term 138646002,Born in St. Vincent 138647006,Born in Togo 138648001,Born in Trinidad and Tobago +138649009,Country of birth (Pacific) 138650009,Born in Fiji 138651008,Born in Kiribati 138652001,Born in Nauru @@ -196,6 +203,7 @@ code,term 138657007,Born in Tuvala 138658002,Born in Vanuatu 138659005,Born in Western Samoa +161180000,Country of birth (European) 161181001,Born in Albania 161182008,Born in Andorra 161183003,Born in Austria @@ -240,6 +248,7 @@ code,term 161225003,Born in Ukraine 161226002,Born in Vatican City 161228001,Born in Yugoslavia +161229009,Country of birth (Asian) 161230004,Born in Afghanistan 161231000,Born in Armenia 161232007,Born in Bahrain @@ -292,6 +301,7 @@ code,term 161280005,Born in Uzbekistan 161281009,Born in Vietnam 161282002,Born in Yemen +161283007,Country of birth (American) 161284001,Born in Argentina 161285000,Born in Belize 161286004,Born in Bolivia @@ -316,6 +326,7 @@ code,term 161305004,Born in USA 161306003,Born in Uruguay 161307007,Born in Venezuela +161308002,Country of birth (African) 161309005,Born in Algeria 161310000,Born in Angola 161311001,Born in Benin @@ -367,8 +378,10 @@ code,term 161358009,Born in Zaire 161359001,Born in Zambia 161360006,Born in Zimbabwe +161361005,Country of birth (Australasian) 161362003,Born in Australia 161363008,Born in New Zealand +161364002,Country of birth (Atlantic) 161365001,Born in Antigua and Barbuda 161366000,Born in Bahamas 161367009,Born in Barbados @@ -382,6 +395,7 @@ code,term 161375003,Born in St. Vincent 161376002,Born in Togo 161377006,Born in Trinidad and Tobago +161378001,Country of birth (Pacific) 161379009,Born in Fiji 161380007,Born in Kiribati 161381006,Born in Nauru @@ -395,6 +409,14 @@ code,term 205091000000107,Born in Eritrea 206981000000101,Born in Eritrea 206991000000104,Born in Eritrea +2816171000000106,Born in South Sudan +315355003,Country of birth - European +315356002,Country of birth - Asian +315357006,Country of birth - American continent +315358001,Country of birth - African +315359009,Country of birth - Australasian +315360004,Country of birth - Atlantic +315361000,Country of birth - Pacific 315365009,Born in Afghanistan 315366005,Born in Albania 315367001,Born in Algeria @@ -609,6 +631,7 @@ code,term 345801000000108,Born in Martinique 345811000000105,Born in Martinique 345821000000104,Born in Martinique +413330005,Victim of oppression in country of origin 503061000000107,Born in Anguilla 503071000000100,Born in Anguilla 503081000000103,Born in Anguilla @@ -622,6 +645,7 @@ code,term 750491000000109,Born in Democratic Republic of Congo 768761000000104,Born in Aruba 768771000000106,Born in Aruba +841311000000103,Country of origin high risk for blood-borne virus 858651000000103,Born in Montenegro 858661000000100,Born in Montenegro 918951000000105,Born in Samoa diff --git a/codelists/opensafely-english-not-main-language.csv b/codelists/opensafely-english-not-main-language.csv index 8f1a4f6..aa618df 100644 --- a/codelists/opensafely-english-not-main-language.csv +++ b/codelists/opensafely-english-not-main-language.csv @@ -2,6 +2,7 @@ code,term 1036381000000101,Main spoken language Romany 1036391000000104,Main spoken language Romanes 1047281000000107,Does not speak English +1047291000000109,Does not speak English 107751000000106,Main spoken language Pashto 1264278006,Iranian Persian language 1300106003,Completed English as a second language program @@ -42,6 +43,7 @@ code,term 161144000,Pashtu language 161145004,Punjabi language 161146003,Urdu language +161148002,Speaks English poorly 161172007,World languages 161173002,Vietnamese language 161176005,Creole language @@ -1103,6 +1105,7 @@ code,term 621391000124107,Referral to English as a second language program 651321000124106,Education about English as a second language program 661891000124107,Assistance with application for English as a second language program +662331000124108,Evaluation of eligibility for English as a second language program 698651000,Main spoken language Abkhazian 698652007,Main spoken language Afar 698653002,Main spoken language Afrikaans @@ -1359,6 +1362,7 @@ code,term 85621000000107,Main spoken language Vietnamese 85921000000103,Main spoken language Korean 85931000000101,Main spoken language Igbo +910241000000102,First language not English 910251000000104,First language not English 945161000000108,Difficulty understanding verbal language 945211000000103,Does not understand verbal language diff --git a/codelists/opensafely-immigration-status-excl-refugee-asylum.csv b/codelists/opensafely-immigration-status-excl-refugee-asylum.csv index ce7cd4e..cf50c4c 100644 --- a/codelists/opensafely-immigration-status-excl-refugee-asylum.csv +++ b/codelists/opensafely-immigration-status-excl-refugee-asylum.csv @@ -1,4 +1,5 @@ code,term +103738006,"History and physical examination, immigration" 1085811000000100,History of detention in immigration removal centre 1352008001,Residence permit for foreign citizen status 1364151000000105,Immigration Removal Centre Assessment Toolkit discharge planning screening @@ -9,15 +10,27 @@ code,term 1364271000000108,Immigration Removal Centre Assessment Toolkit secondary screening declined 1364871000000107,Immigration Removal Centre Assessment Toolkit discharge screening 1364881000000109,Immigration Removal Centre Assessment Toolkit discharge screening declined +137906009,Immigrant +138090008,Social migrant +138429005,Immigrant +148586001,Immigration examination +160509003,Immigrant +160701002,Social migrant +160702009,Illegal migrant +161158003,Immigrant 185711000000100,Family reunion immigrant +1874641000000107,Initial health assessment using New Patient Questionnaire for newly arrived migrants in the United Kingdom declined +1874651000000105,Initial health assessment using New Patient Questionnaire for newly arrived migrants in the United Kingdom 189151000000100,Family reunion immigrant 198311000000106,Overseas visitor 213261000000106,Overseas visitor 213271000000104,Overseas visitor -306211000000109,Failed asylum seeker +224619008,Migrant +336351000000106,Migrant worker - National Public Health Classification 34051000087100,Citizenship status 416625007,Family reunion immigrant 450768005,International student +720301000000108,Migrant worker 728641000000104,Person granted indefinite leave to remain in United Kingdom 781031000000101,International student 811031000000102,Has United Kingdom student visa diff --git a/codelists/opensafely-interpreter-required.csv b/codelists/opensafely-interpreter-required.csv index dd1c4e0..a19c115 100644 --- a/codelists/opensafely-interpreter-required.csv +++ b/codelists/opensafely-interpreter-required.csv @@ -1,6 +1,7 @@ code,term 1047321000000104,Romany language interpreter needed 1050791000000101,Romany language interpreter needed +1254706008,Referral to language interpretation service 1254713008,Requires language interpretation service to support health literacy 1366478006,Requires spoken language interpretation service 1366479003,Requires in-person spoken language interpretation service @@ -168,7 +169,12 @@ code,term 211561000000105,Interpreter needed - Pashto 211641000000108,Interpreter needed - Polish 211671000000102,Interpreter needed - Polish +2672691000000101,Dari language interpreter needed +276161000000101,Interpreter not available +276171000000108,Interpreter not available +276181000000105,Interpreter not available 303601000000100,Telephone interpreting service used +314430004,Presence of interpreter 314431000,Interpreter present 315593009,Need for interpreter 315594003,Interpreter needed @@ -247,6 +253,8 @@ code,term 360291000000104,Javanese language interpreter needed 360301000000100,Javanese language interpreter needed 360311000000103,Inuktitut language interpreter needed +360321000000109,Inuktitut language iterpreter needed +360331000000106,Inuktitut language iterpreter needed 360341000000102,Interlingue language interpreter needed 360351000000104,Interlingue language interpreter needed 360361000000101,Interlingue language interpreter needed @@ -439,9 +447,11 @@ code,term 370271000000107,Interpreter needed 413309001,No adult family member literate in English 423785008,Provision of interpreter/translator services +426201006,Interpreter not available 445075008,Request for language interpreter service 521521000000104,Request for language interpreter service 621391000124107,Referral to English as a second language program +662851000124106,Referral to interpreter 736790000,Interpreter booked 745664000,Requires telephone language interpreter service 787661000000108,Interpreter booked diff --git a/codelists/opensafely-migration-status.csv b/codelists/opensafely-migration-status.csv index 0717a81..a8fc165 100644 --- a/codelists/opensafely-migration-status.csv +++ b/codelists/opensafely-migration-status.csv @@ -38,8 +38,6 @@ code,term 1366478006,Requires spoken language interpretation service 1366479003,Requires in-person spoken language interpretation service 137906009,Immigrant -137910007,Country of origin -137921003,Country of origin NOS 138090008,Social migrant 138429005,Immigrant 138443000,World languages @@ -76,14 +74,12 @@ code,term 138480006,Born in Malta 138481005,Born in Moldavia 138482003,Born in Monaco -138483008,Born in Northern Ireland 138484002,Born in Norway 138485001,Born in Poland 138486000,Born in Portugal 138487009,Born in Republic of Ireland 138488004,Born in Romania 138489007,Born in San Marino -138490003,Born in Scotland 138491004,Born in Slovakia 138492006,Born in Slovenia 138493001,Born in Spain @@ -280,7 +276,6 @@ code,term 153703000,Need for interpreter 153704006,Interpreter needed 160509003,Immigrant -160537005,Country of origin NOS 160701002,Social migrant 160702009,Illegal migrant 161141008,Bengali language @@ -326,14 +321,12 @@ code,term 161208000,Born in Malta 161209008,Born in Moldavia 161210003,Born in Monaco -161211004,Born in Northern Ireland 161212006,Born in Norway 161213001,Born in Poland 161214007,Born in Portugal 161215008,Born in Republic of Ireland 161216009,Born in Romania 161217000,Born in San Marino -161218005,Born in Scotland 161219002,Born in Slovakia 161220008,Born in Slovenia 161221007,Born in Spain @@ -629,8 +622,6 @@ code,term 209341000000102,Interpreter needed - Urdu 209351000000104,Interpreter needed - Vietnamese 209361000000101,Interpreter needed - Vietnamese -209371000000108,Interpreter needed - Welsh -209381000000105,Interpreter needed - Welsh 209391000000107,Interpreter needed - Yoruba 209401000000105,Interpreter needed - Yoruba 210781000000104,Interpreter needed - Akan @@ -697,22 +688,16 @@ code,term 211561000000105,Interpreter needed - Pashto 211641000000108,Interpreter needed - Polish 211671000000102,Interpreter needed - Polish -212061000000105,Akan as a second language -212071000000103,Akan as a second language -212081000000101,Albanian as a second language -212091000000104,Albanian as a second language -212101000000107,Amharic as a second language -212111000000109,Amharic as a second language -212121000000103,Arabic as a second language -212131000000101,Arabic as a second language 212241000000100,English as a second language 212251000000102,English as a second language 213261000000106,Overseas visitor 213271000000104,Overseas visitor 224619008,Migrant 224628009,Hinko language +2672691000000101,Dari language interpreter needed 276171000000108,Interpreter not available 276181000000105,Interpreter not available +2816171000000106,Born in South Sudan 297289008,World languages 297290004,Afro-Asiatic language 297291000,Berber language @@ -1578,7 +1563,6 @@ code,term 315590007,Born in Yemen 315593009,Need for interpreter 315594003,Interpreter needed -331671000000104,Migration - National Public Health Classification 336351000000106,Migrant worker - National Public Health Classification 341651000000107,Born in former Yugoslav Republic of Macedonia 341661000000105,Born in former Yugoslav Republic of Macedonia @@ -2126,7 +2110,6 @@ code,term 370261000000100,Interpreter needed 370271000000107,Interpreter needed 370721000000100,Main spoken language Bulgarian -377051000000109,Former Yugoslav Republic of Macedonia 389297006,Refugee 389298001,Asylum seeker 390033009,Asylum seeker @@ -2161,7 +2144,6 @@ code,term 408519004,Main spoken language Thai 408520005,Main spoken language Flemish 408521009,Main spoken language French Créole -408522002,Main spoken language Gaelic 408523007,Main spoken language Hakka 408524001,Main spoken language Hebrew 408525000,Main spoken language Akan @@ -2175,6 +2157,7 @@ code,term 408534005,Main spoken language Patois 408535006,Main spoken language Serbian 413323004,Refugee family +413330005,Victim of oppression in country of origin 414640006,Main spoken language Finnish 416625007,Family reunion immigrant 423785008,Provision of interpreter/translator services @@ -2256,7 +2239,6 @@ code,term 523561000000106,Born in Serbia 523571000000104,Born in Serbia 554851000005102,Asylum seekers center -568541000000108,Country of origin NOS 609092003,Main spoken language Bamun 609093008,Main spoken language Dari 609094002,Main spoken language Konkani @@ -2504,7 +2486,6 @@ code,term 708892009,Hmong language 718512007,Main spoken language Romany 720301000000108,Migrant worker -723981000000108,Migration 728611000000100,Asylum seeker awaiting decision on refugee status 728621000000106,Asylum seeker with application for asylum refused 728631000000108,Asylum seeker with humanitarian protection status @@ -2738,7 +2719,6 @@ code,term 972591000000100,Chinese interpreter needed 972601000000106,Church Slavic interpreter needed 972611000000108,Chuvash interpreter needed -972621000000102,Cornish interpreter needed 972631000000100,Cree interpreter needed 972641000000109,Dhivehi interpreter needed 972651000000107,Ewe interpreter needed @@ -2789,7 +2769,6 @@ code,term 974871000000109,Interpreter needed for language N - S 974881000000106,Interpreter needed for language G - M 974891000000108,Interpreter needed for language A - F -991491000000100,Country of birth findings simple reference set 99471000000105,Main spoken language Italian 99481000000107,Main spoken language German 99491000000109,Main spoken language Albanian diff --git a/codelists/opensafely-trafficking-and-modern-slavery.csv b/codelists/opensafely-trafficking-and-modern-slavery.csv new file mode 100644 index 0000000..affff5a --- /dev/null +++ b/codelists/opensafely-trafficking-and-modern-slavery.csv @@ -0,0 +1,15 @@ +code,term +1017202000,At increased risk of human trafficking +1045691000000103,Victim of modern slavery +1045701000000103,Victim of modern slavery +1045751000000102,Victim of domestic servitude +1045861000000108,At risk of human trafficking +1045981000000106,At risk of slavery +1050391000000102,At risk of human trafficking +1050481000000109,At risk of slavery +16290721000119109,Victim of human trafficking in adolescence +16290761000119104,Victim of human trafficking in adulthood +16290801000119107,Victim of human trafficking in childhood +734998001,Victim of human trafficking +863561000000103,Victim of human trafficking +875581000000101,Victim of human trafficking From 17155b8baf14494f08a814b138cd4006a85180cb Mon Sep 17 00:00:00 2001 From: Yamina Boukari <87201452+YaminaB@users.noreply.github.com> Date: Wed, 1 Apr 2026 13:10:29 +0000 Subject: [PATCH 3/4] added trafficking codelist, updated migration status variables, created date of entry cohort --- analysis/codelists.py | 2 + ...dataset_definition_date_of_entry_cohort.py | 182 ++++++++++++++++++ analysis/migration_status_variables.py | 6 +- ...tion_migration_code_before_birth_cohort.py | 0 project.yaml | 14 +- 5 files changed, 191 insertions(+), 13 deletions(-) create mode 100644 analysis/dataset_definition_date_of_entry_cohort.py rename analysis/{ => scrapyard}/dataset_definition_migration_code_before_birth_cohort.py (100%) diff --git a/analysis/codelists.py b/analysis/codelists.py index cc35576..6642645 100644 --- a/analysis/codelists.py +++ b/analysis/codelists.py @@ -19,6 +19,8 @@ interpreter_migrant_codes = codelist_from_csv("codelists/opensafely-interpreter-required.csv", column="code") +trafficking_codes = codelist_from_csv("codelists/opensafely-trafficking-and-modern-slavery.csv", column="code") + british_ethnicities_codes = codelist_from_csv("codelists/opensafely-british-ethnicities.csv", column="code") ethnicity_16_level_codelist = codelist_from_csv( diff --git a/analysis/dataset_definition_date_of_entry_cohort.py b/analysis/dataset_definition_date_of_entry_cohort.py new file mode 100644 index 0000000..582da27 --- /dev/null +++ b/analysis/dataset_definition_date_of_entry_cohort.py @@ -0,0 +1,182 @@ +# ############################################################################# +# Number of migrants in OpenSAFELY-TPP from 2009-2025 +# - Author: Yamina Boukari +# - Bennett Institute for Applied Data Science, University of Oxford, 2025 +############################################################################# + +# This is a script to create a cohort of people: +# 1) have a date of UK entry code +# 1) were registered at anytime (2009-2025) +# 2) had a first registration that was between their birth and death date +# 2) do not have a disclosive sex AND +# 4) did not die before or on 1st Jan 2009 (study start) +# 4) had a plausible age at the beginning of the study period (i.e. not >110 years old in 2009) + +from pathlib import Path + +from ehrql import create_dataset, codelist_from_csv, show, case, when, days +from ehrql.tables.tpp import addresses, patients, practice_registrations, clinical_events, ons_deaths +import codelists +import migration_status_variables + +# Dates + +study_start_date = "2009-01-01" +study_end_date = "2025-12-31" + +date_of_first_practice_registration = ( + practice_registrations.sort_by(practice_registrations.start_date) + .first_for_patient().start_date +) + +end_date_of_latest_practice_registration = ( + practice_registrations.sort_by(practice_registrations.end_date) + .last_for_patient().end_date +) + +is_registered_at_any_time_during_study = ( + # starts during period + date_of_first_practice_registration.is_on_or_between( + study_start_date, + study_end_date, + ) + # ending during period + | end_date_of_latest_practice_registration.is_on_or_between( + study_start_date, + study_end_date, + ) + # starting before and ending after (or ongoing) + | ( + date_of_first_practice_registration.is_on_or_before(study_start_date) + & ( + end_date_of_latest_practice_registration.is_on_or_after(study_end_date) + | end_date_of_latest_practice_registration.is_null() + ) + ) +) + +has_first_registration_between_birth_and_death = ( + # between dob and date of death + (date_of_first_practice_registration.is_on_or_between(patients.date_of_birth, patients.date_of_death)) | + # after dob and date of death is null (still alive) + (date_of_first_practice_registration.is_on_or_after(patients.date_of_birth) & patients.date_of_death.is_null()) +) + + +has_non_disclosive_sex = ( + (patients.sex == "male") | (patients.sex == "female") +) + +did_not_die_before_study_start = ( + ((patients.date_of_death > study_start_date) | (patients.date_of_death.is_null())) & + ((ons_deaths.date > study_start_date) | (ons_deaths.date.is_null())) +) + +was_not_over_110_at_study_start_or_less_than_0_at_end_date = ( + (patients.age_on(study_start_date) <= 110) | (patients.age_on(study_end_date) >= 0) +) + +# has date of UK entry code + +date_of_entry_code = ["860021000000109"] + +has_date_of_uk_entry = ( + clinical_events + .where(clinical_events.snomedct_code.is_in(date_of_entry_code)) + .where(clinical_events.date.is_on_or_between(patients.date_of_birth, study_end_date)) + .where((clinical_events.date.is_on_or_before(patients.date_of_death)) | (patients.date_of_death.is_null())) + .exists_for_patient() +) + + +dataset = create_dataset() +dataset.define_population(has_date_of_uk_entry & + is_registered_at_any_time_during_study & + has_first_registration_between_birth_and_death & + has_non_disclosive_sex & + did_not_die_before_study_start & + was_not_over_110_at_study_start_or_less_than_0_at_end_date) + +# add variables + +## year of birth and date of birth +year_of_birth = (patients.date_of_birth).year +dataset.year_of_birth = year_of_birth + +dataset.year_of_birth_band = case( + when((year_of_birth >= 1900) & (year_of_birth <= 1925)).then("1900-1925"), + when((year_of_birth > 1925) & (year_of_birth <= 1945)).then("1926-1945"), + when((year_of_birth > 1945) & (year_of_birth <= 1965)).then("1946-1965"), + when((year_of_birth > 1965) & (year_of_birth <= 1985)).then("1966-1985"), + when((year_of_birth > 1985) & (year_of_birth <= 2005)).then("1986-2005"), + when((year_of_birth > 2005) & (year_of_birth <= 2025)).then("2006-2025") +) + +dataset.date_of_birth = patients.date_of_birth + +## sex + +dataset.sex = patients.sex + +## ethnicity + +latest_ethnicity_code = ( + clinical_events.where(clinical_events.snomedct_code.is_in(codelists.ethnicity_16_level_codelist)) + .where(clinical_events.date.is_on_or_before(study_end_date)) + .sort_by(clinical_events.date) + .last_for_patient() + .snomedct_code) +dataset.latest_ethnicity_code = latest_ethnicity_code + +latest_ethnicity_16_level_group = latest_ethnicity_code.to_category( + codelists.ethnicity_16_level_codelist) +dataset.latest_ethnicity_16_level_group = latest_ethnicity_16_level_group + +latest_ethnicity_6_level_group = latest_ethnicity_code.to_category( + codelists.ethnicity_6_level_codelist) +dataset.latest_ethnicity_6_level_group = latest_ethnicity_6_level_group + +## practice region (latest during the study period) + +dataset.region = (practice_registrations + .sort_by(practice_registrations.start_date) + .last_for_patient() + .practice_nuts1_region_name) + +## imd + +address = (addresses + .sort_by(addresses.start_date) + .last_for_patient()) + +dataset.imd_decile = address.imd_decile +dataset.imd_quintile = address.imd_quintile + +## date of first practice registration + +dataset.date_of_first_practice_registration = date_of_first_practice_registration + +dataset.date_of_death = patients.date_of_death + +# migration status + +migrant_indicators = migration_status_variables.build_migrant_indicators(study_end_date) + +for name, indicator in migrant_indicators.items(): + setattr(dataset, name, indicator) + +## consolidate migration indiciators into 2-cat, 3-cat and 6-cat variables + +dataset.mig_status_2_cat = migration_status_variables.build_mig_status_2_cat(migrant_indicators) + +dataset.mig_status_3_cat = migration_status_variables.build_mig_status_3_cat( + migrant_indicators) + +dataset.mig_status_6_cat = migration_status_variables.build_mig_status_6_cat( + migrant_indicators +) + +dataset.configure_dummy_data(population_size=1000) +show(dataset) + + diff --git a/analysis/migration_status_variables.py b/analysis/migration_status_variables.py index 1aa4f96..f6cdc38 100644 --- a/analysis/migration_status_variables.py +++ b/analysis/migration_status_variables.py @@ -11,6 +11,7 @@ "refugee_asylum_status": codelists.asylum_refugee_migrant_codes, "english_not_main_language": codelists.english_not_main_language_excl_interpreter_migrant_codes, "interpreter_required": codelists.interpreter_migrant_codes, + "trafficking": codelists.trafficking_codes, "british_ethnicities": codelists.british_ethnicities_codes } @@ -64,7 +65,7 @@ def build_mig_status_6_cat(migrant_indicators): 6-category migrant status (priority order): - Definite migrant: not_born_in_uk - Highly likely migrant: immig_status_excl_refugee_asylum OR refugee_asylum_status - - Likely migrant: english_not_main_language OR interpreter_required + - Likely migrant: english_not_main_language OR interpreter_required OR trafficking - Definite non-migrant: born_in_uk - Likely non-migrant: british_ethnicities AND no migrant code - Unknown: no migrant codes @@ -75,12 +76,13 @@ def build_mig_status_6_cat(migrant_indicators): refugee_asylum = migrant_indicators.get("refugee_asylum_status", False) english_not_main = migrant_indicators.get("english_not_main_language", False) interpreter_required = migrant_indicators.get("interpreter_required", False) + trafficking = migrant_indicators.get("trafficking", False) born_in_uk = migrant_indicators.get("born_in_uk", False) british_ethnicities = migrant_indicators.get("british_ethnicities", False) # Compose combined conditions highly_likely = immig_excl | refugee_asylum - likely_migrant = english_not_main | interpreter_required + likely_migrant = english_not_main | interpreter_required | trafficking likely_non_migrant = ((british_ethnicities) & ~migrant) unknown = (~migrant) diff --git a/analysis/dataset_definition_migration_code_before_birth_cohort.py b/analysis/scrapyard/dataset_definition_migration_code_before_birth_cohort.py similarity index 100% rename from analysis/dataset_definition_migration_code_before_birth_cohort.py rename to analysis/scrapyard/dataset_definition_migration_code_before_birth_cohort.py diff --git a/project.yaml b/project.yaml index 53d9f22..15afe83 100644 --- a/project.yaml +++ b/project.yaml @@ -25,11 +25,11 @@ actions: highly_sensitive: dataset: output/cohorts/census_2011_study_cohort.arrow - generate_pre_birth_migration_code_cohort: - run: ehrql:v1 generate-dataset analysis/dataset_definition_migration_code_before_birth_cohort.py --output output/cohorts/migration_code_before_birth_cohort.arrow + generate_date_of_entry_cohort: + run: ehrql:v1 generate-dataset analysis/dataset_definition_date_of_entry_cohort.py --output output/cohorts/date_of_entry_cohort.arrow outputs: highly_sensitive: - dataset: output/cohorts/migration_code_before_birth_cohort.arrow + dataset: output/cohorts/date_of_entry_cohort.arrow generate_demographics_full_study_table_2cat: run: r:latest analysis/process_full_cohort_data.R output/tables/demographics_full_study_cohort_2cat.csv mig_status_2_cat @@ -157,14 +157,6 @@ actions: moderately_sensitive: csv: output/tables/migration_code_combinations_summary.csv - generate_migration_code_before_birth_combinations_summary: - run: r:latest analysis/migration_code_before_birth_combinations.R - needs: - - generate_pre_birth_migration_code_cohort - outputs: - moderately_sensitive: - csv: output/tables/migration_code_combinations_before_birth_summary.csv - generate_date_variable_checks_summary: run: r:latest analysis/date_variable_checks.R needs: From f8aa67885822da37502614796652d401feea96fe Mon Sep 17 00:00:00 2001 From: Yamina Boukari <87201452+YaminaB@users.noreply.github.com> Date: Thu, 2 Apr 2026 11:05:45 +0000 Subject: [PATCH 4/4] Added yaml heading for full cohort migration types and uk entry cohort plus processing scripts --- analysis/process_date_of_uk_entry_cohort.R | 89 +++++++++++++++++ ...process_full_cohort_data_migration_types.R | 99 +++++++++++++++++++ project.yaml | 16 +++ 3 files changed, 204 insertions(+) create mode 100644 analysis/process_date_of_uk_entry_cohort.R create mode 100644 analysis/process_full_cohort_data_migration_types.R diff --git a/analysis/process_date_of_uk_entry_cohort.R b/analysis/process_date_of_uk_entry_cohort.R new file mode 100644 index 0000000..33f7726 --- /dev/null +++ b/analysis/process_date_of_uk_entry_cohort.R @@ -0,0 +1,89 @@ +################################################### +# This script creates descriptive demographic tables for the overall full cohort +# +# Author: Yamina Boukari +# Bennett Institute for Applied Data Science +# University of Oxford, 2026 +# +################################################### + +library(tidyverse) +library(lubridate) +library(here) +library(arrow) +library(skimr) +library(fs) + +## Create output directory +output_dir <- here::here("output", "tables") +fs::dir_create(output_dir) + +cohort_file <- "output/cohorts/date_of_entry_cohort.arrow" +output_file <- "output/tables/demographics_date_of_uk_entry_cohort.csv" + +# Parse command-line argument +args <- commandArgs(trailingOnly=TRUE) +print(commandArgs(trailingOnly=TRUE)) + + +# Import data ---- +cohort <- read_feather(cohort_file) %>% + mutate( + across( + where(is.ordered), + ~ factor(as.character(.x)) + ) + ) + +# Summarize ----- + +vars_to_summarise <- c( + "year_of_birth_band", + "sex", + "region", + "latest_ethnicity_6_level_group", + "imd_quintile" +) + + +rounding <- function(vars) { + case_when(vars == 0 ~ 0, + vars > 7 ~ round(vars / 5) * 5) +} + +table_freq_overall <- cohort %>% + group_by(any_migrant) %>% + summarise( + n = rounding(nrow(cohort)), + percentage = 100) %>% + mutate( + subgroup = "All", + category = "All" + ) + +table_freq <- cohort %>% + pivot_longer( + cols = all_of(vars_to_summarise), + names_to = "subgroup", + values_to = "category" + ) %>% + group_by(any_migrant, subgroup + ) %>% + count( + category, + name = "n" + ) %>% + mutate( + category = fct_explicit_na(category, "unknown"), + n = rounding(n), + percentage = round((100 * n / sum(n, na.rm = TRUE)),1) + ) %>% + ungroup() + +table_freq <- bind_rows(table_freq_overall, table_freq) +table_freq <- table_freq %>% + relocate(n, .before= percentage) + +dir_create(path_dir(output_file)) +write_csv(table_freq, path = output_file) + diff --git a/analysis/process_full_cohort_data_migration_types.R b/analysis/process_full_cohort_data_migration_types.R new file mode 100644 index 0000000..b475d33 --- /dev/null +++ b/analysis/process_full_cohort_data_migration_types.R @@ -0,0 +1,99 @@ +################################################### +# This script creates descriptive demographic tables for the overall full cohort by migration code type +# +# Author: Yamina Boukari +# Bennett Institute for Applied Data Science +# University of Oxford, 2026 +# +################################################### + +library(tidyverse) +library(lubridate) +library(here) +library(arrow) +library(skimr) +library(fs) + +## Create output directory +output_dir <- here::here("output", "tables") +fs::dir_create(output_dir) + +cohort_file <- "output/cohorts/full_study_cohort.arrow" +output_file <- "output/tables/demographics_full_study_cohort_migration_types.csv" + +# Import data ---- +cohort <- read_feather(cohort_file) %>% + mutate( + across( + where(is.ordered), + ~ factor(as.character(.x)) + ) + ) + +migration_type_vars <- c("any_migrant", + "not_born_in_uk", + "immig_status_excl_refugee_asylum", + "refugee_asylum_status", + "english_not_main_language", + "interpreter_required", + "trafficking", + "british_ethnicities", + "born_in_uk") +vars_to_summarise <- c( + "year_of_birth_band", + "sex", + "region", + "latest_ethnicity_6_level_group", + "imd_quintile" +) + +rounding <- function(vars) { + case_when(vars == 0 ~ 0, + vars > 7 ~ round(vars / 5) * 5) +} + +summarise_variable <- function(cohort, var) { + cohort <- cohort %>% + filter(.data[[var]] == TRUE) + + table_freq_overall <- tibble::tibble( + subgroup = "All", + category = "All", + n = rounding(nrow(cohort)), + percentage = 100 + ) %>% + mutate(cohort_variable_description = var) + +table_freq <- + cohort %>% + pivot_longer( + cols = all_of(vars_to_summarise), + names_to = "subgroup", + values_to = "category" + ) %>% + count( + subgroup, + category, + name = "n") %>% + group_by(subgroup) %>% + mutate( + category = fct_explicit_na(category, "unknown"), + n = rounding(n), + percentage = round((100 * n / sum(n, na.rm = TRUE)),1), + ) %>% + ungroup() %>% + mutate(cohort_variable_description = var) + +bind_rows(table_freq_overall, + table_freq) +} + +results <- lapply(migration_type_vars, function(var){ + summarise_variable(cohort,var) +}) %>% + bind_rows() + +dir_create(path_dir(output_file)) +write_csv(results, path = output_file) + + diff --git a/project.yaml b/project.yaml index 15afe83..cd6bef1 100644 --- a/project.yaml +++ b/project.yaml @@ -55,6 +55,22 @@ actions: moderately_sensitive: csv: output/tables/demographics_full_study_cohort_6cat.csv + generate_demographics_full_study_table_migration_types: + run: r:latest analysis/process_full_cohort_data_migration_types.R + needs: + - generate_full_study_cohort + outputs: + moderately_sensitive: + csv: output/tables/demographics_full_study_cohort_migration_types.csv + + generate_demographics_uk_entry_cohort: + run: r:latest analysis/process_date_of_uk_entry_cohort.R + needs: + - generate_date_of_entry_cohort + outputs: + moderately_sensitive: + csv: output/tables/demographics_date_of_uk_entry_cohort.csv + generate_demographics_census_2011_study_table_mig_2cat: run: r:latest analysis/process_census_cohort_data.R output/cohorts/census_2011_study_cohort.arrow output/tables/demographics_census_2011_cohort_2cat.csv mig_status_2_cat needs: