Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions analysis/dataset_definition_census_cohorts.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,14 @@
dataset.mig_status_6_cat = migration_status_variables.build_mig_status_6_cat(
migrant_indicators)

dataset.mig_status_2_cat_withdoe = migration_status_variables.build_mig_status_2_cat_withdoe(migrant_indicators)

dataset.mig_status_3_cat_withdoe = migration_status_variables.build_mig_status_3_cat_withdoe(
migrant_indicators)

dataset.mig_status_6_cat_withdoe = migration_status_variables.build_mig_status_6_cat_withdoe(
migrant_indicators)

dataset.configure_dummy_data(population_size=1000)

show(dataset)
19 changes: 17 additions & 2 deletions analysis/dataset_definition_full_study_cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,19 @@
## consolidate migration indiciators into 2-cat, 3-cat and 6-cat variables

dataset.mig_status_2_cat = migration_status_variables.build_mig_status_2_cat(migrant_indicators)
dataset.mig_status_2_cat_withdoe = migration_status_variables.build_mig_status_2_cat_withdoe(migrant_indicators)

dataset.mig_status_3_cat = migration_status_variables.build_mig_status_3_cat(
migrant_indicators)
dataset.mig_status_3_cat_withdoe = migration_status_variables.build_mig_status_3_cat_withdoe(
migrant_indicators)

dataset.mig_status_6_cat = migration_status_variables.build_mig_status_6_cat(
migrant_indicators
)
dataset.mig_status_6_cat_withdoe = migration_status_variables.build_mig_status_6_cat_withdoe(
migrant_indicators
)

# number of migration codes per person

Expand All @@ -172,12 +178,21 @@
)
dataset.number_of_migration_codes = number_of_migration_codes

date_of_entry_code = ["860021000000109"]

number_of_migration_codes_withdoe = (
clinical_events
.where((clinical_events.snomedct_code.is_in(codelists.all_migrant_codes)) | (clinical_events.snomedct_code.is_in(date_of_entry_code)))
.where(clinical_events.date.is_on_or_between(patients.date_of_birth, study_end_date))
.where((clinical_events.date.is_on_or_before(patients.date_of_death)) | (patients.date_of_death.is_null()))
.count_for_patient()
)
dataset.number_of_migration_codes_withdoe = number_of_migration_codes_withdoe

# date of entry to the UK (SNOMED CT code: 860021000000109)

## has date of entry to the UK code

date_of_entry_code = ["860021000000109"]

has_date_of_uk_entry = (
clinical_events
.where(clinical_events.snomedct_code.is_in(date_of_entry_code))
Expand Down
14 changes: 0 additions & 14 deletions analysis/generate_annual_migrant_counts_2cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,3 @@
else:
name = f"{var_name}_{safe_label}_{suffix}"
measures.define_measure(name=name, numerator=bool_numer, group_by=group)

# overall (ungrouped) measures

# for label in labels:
# bool_numer = (mig2_expr == label)
# safe_label = label.lower().replace(" ", "_").replace("-", "_")
# var_name = "mig_status_2_cat_overall"

# name = f"{var_name}_{safe_label}"

# measures.define_measure(
# name=name,
# numerator=bool_numer
# )
31 changes: 31 additions & 0 deletions analysis/generate_annual_migrant_counts_2cat_withdoe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from ehrql import create_measures, INTERVAL
from ehrql.tables.tpp import patients, practice_registrations, clinical_events, addresses
import migration_status_variables
from analysis import utilities
import codelists

measures = create_measures()
measures.configure_dummy_data(population_size=1000)
measures.configure_disclosure_control(enabled=True) # enable on real data

# build shared variables and defaults
common = utilities.build_common_vars(INTERVAL)
measures.define_defaults(denominator=common["denominator"], intervals=common["intervals"])
subgroups = common["subgroups"]

# build base indicators and aggregated 2-category expression
numerators_separate = migration_status_variables.build_migrant_indicators(INTERVAL.end_date)
mig2_expr = migration_status_variables.build_mig_status_2_cat_withdoe(numerators_separate)

# register one measure per label × subgroup
labels = ["Migrant", "Non-migrant"]
for label in labels:
bool_numer = (mig2_expr == label)
safe_label = label.lower().replace(" ", "_").replace("-", "_")
var_name = "mig_status_2_cat"
for suffix, group in subgroups.items():
if suffix == "":
name = f"{var_name}_{safe_label}"
else:
name = f"{var_name}_{safe_label}_{suffix}"
measures.define_measure(name=name, numerator=bool_numer, group_by=group)
14 changes: 0 additions & 14 deletions analysis/generate_annual_migrant_counts_3cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,3 @@
else:
name = f"{var_name}_{safe_label}_{suffix}"
measures.define_measure(name=name, numerator=bool_numer, group_by=group)

# overall (ungrouped) measures

# for label in labels:
# bool_numer = (mig3_expr == label)
# safe_label = label.lower().replace(" ", "_").replace("-", "_")
# var_name = "mig_status_3_cat_overall"

# name = f"{var_name}_{safe_label}"

# measures.define_measure(
# name=name,
# numerator=bool_numer
# )
32 changes: 32 additions & 0 deletions analysis/generate_annual_migrant_counts_3cat_withdoe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

from ehrql import create_measures, INTERVAL
from ehrql.tables.tpp import patients, practice_registrations, clinical_events, addresses
import migration_status_variables
from analysis import utilities

measures = create_measures()
measures.configure_dummy_data(population_size=1000)
measures.configure_disclosure_control(enabled=True) # enable on real data

# build shared variables and defaults
common = utilities.build_common_vars(INTERVAL)
measures.define_defaults(denominator=common["denominator"], intervals=common["intervals"])
subgroups = common["subgroups"]
ethnicity = common["ethnicity"]

# build base indicators and aggregated 3-category expression
numerators_separate = migration_status_variables.build_migrant_indicators(INTERVAL.end_date)
mig3_expr = migration_status_variables.build_mig_status_3_cat_withdoe(numerators_separate)

# register one measure per label × subgroup
labels = ["Migrant", "Non-migrant", "Unknown"]
for label in labels:
bool_numer = (mig3_expr == label)
safe_label = label.lower().replace(" ", "_").replace("-", "_")
var_name = "mig_status_3_cat"
for suffix, group in subgroups.items():
if suffix == "":
name = f"{var_name}_{safe_label}"
else:
name = f"{var_name}_{safe_label}_{suffix}"
measures.define_measure(name=name, numerator=bool_numer, group_by=group)
14 changes: 0 additions & 14 deletions analysis/generate_annual_migrant_counts_6cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,3 @@
else:
name = f"{var_name}_{safe_label}_{suffix}"
measures.define_measure(name=name, numerator=bool_numer, group_by=group)

# # overall (ungrouped) measures

# for label in labels:
# bool_numer = (mig6_expr == label)
# safe_label = label.lower().replace(" ", "_").replace("-", "_")
# var_name = "mig_status_6_cat_overall"

# name = f"{var_name}_{safe_label}"

# measures.define_measure(
# name=name,
# numerator=bool_numer
# )
39 changes: 39 additions & 0 deletions analysis/generate_annual_migrant_counts_6cat_withdoe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

from ehrql import create_measures, INTERVAL
from ehrql.tables.tpp import patients, practice_registrations, clinical_events, addresses
import migration_status_variables
from analysis import utilities

measures = create_measures()
measures.configure_dummy_data(population_size=1000)
measures.configure_disclosure_control(enabled=True) # enable on real data

# build shared variables and defaults
common = utilities.build_common_vars(INTERVAL)
measures.define_defaults(denominator=common["denominator"], intervals=common["intervals"])
subgroups = common["subgroups"]
ethnicity = common["ethnicity"]

# build base indicators and aggregated 6-category expression
numerators_separate = migration_status_variables.build_migrant_indicators(INTERVAL.end_date)
mig6_expr = migration_status_variables.build_mig_status_6_cat_withdoe(numerators_separate)


labels = [
"Definite migrant",
"Highly likely migrant",
"Likely migrant",
"Definite non-migrant",
"Likely non-migrant",
"Unknown",
]
for label in labels:
bool_numer = (mig6_expr == label)
safe_label = label.lower().replace(" ", "_").replace("-", "_")
var_name = "mig_status_6_cat"
for suffix, group in subgroups.items():
if suffix == "":
name = f"{var_name}_{safe_label}"
else:
name = f"{var_name}_{safe_label}_{suffix}"
measures.define_measure(name=name, numerator=bool_numer, group_by=group)
12 changes: 0 additions & 12 deletions analysis/generate_annual_migrant_counts_migration_status_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,3 @@

measures.define_measure(name=name, numerator=expr, group_by=group)

# Overall (no grouping) counts

# var_name = "migration_status_types_overall"

# for key, expr in numerators_separate.items():
# safe_label = key.lower().replace(" ", "_").replace("-", "_")
# name = f"{var_name}_{safe_label}"

# measures.define_measure(
# name=name,
# numerator=expr
# )
77 changes: 76 additions & 1 deletion analysis/migration_status_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
"english_not_main_language": codelists.english_not_main_language_excl_interpreter_migrant_codes,
"interpreter_required": codelists.interpreter_migrant_codes,
"trafficking": codelists.trafficking_codes,
"british_ethnicities": codelists.british_ethnicities_codes
"british_ethnicities": codelists.british_ethnicities_codes,
"date_of_uk_entry": ["860021000000109"]
}

def build_migrant_indicators(date):
Expand Down Expand Up @@ -41,6 +42,22 @@ def build_mig_status_2_cat(migrant_indicators):
otherwise="Non-migrant"
)

def build_mig_status_2_cat_withdoe(migrant_indicators):
"""
2-category migrant status:
- "Migrant" if migrant_indicators["any_migrant"] is True OR migrant_indicators["date_of_uk_entry"] is TRUE
- "Non-migrant" otherwise
"""
migrant = migrant_indicators.get("any_migrant", False)
date_of_uk_entry = migrant_indicators.get("date_of_uk_entry", False)
migrant_all = migrant | date_of_uk_entry


return case(
when(migrant_all).then("Migrant"),
otherwise="Non-migrant"
)

def build_mig_status_3_cat(migrant_indicators):
"""
3-category migrant status:
Expand All @@ -60,6 +77,27 @@ def build_mig_status_3_cat(migrant_indicators):
otherwise="Unknown"
)

def build_mig_status_3_cat_withdoe(migrant_indicators):
"""
3-category migrant status:
- "Migrant" if migrant_indicators["migrant"] OR migrant_indicators["date_of_uk_entry"] is TRUE
- "Non-migrant" if born_in_uk OR british_ethnicities AND no migrant code)
- "Unknown" otherwise
"""
migrant = migrant_indicators.get("any_migrant", False)
date_of_uk_entry = migrant_indicators.get("date_of_uk_entry", False)
born_in_uk = migrant_indicators.get("born_in_uk", False)
british_ethnicities = migrant_indicators.get("british_ethnicities", False)

migrant_cond = migrant | date_of_uk_entry
non_migrant_cond = born_in_uk | ((british_ethnicities) & ~migrant_cond)

return case(
when(migrant_cond).then("Migrant"),
when(non_migrant_cond).then("Non-migrant"),
otherwise="Unknown"
)

def build_mig_status_6_cat(migrant_indicators):
"""
6-category migrant status (priority order):
Expand Down Expand Up @@ -95,3 +133,40 @@ def build_mig_status_6_cat(migrant_indicators):
when(unknown).then("Unknown"),
otherwise="Error"
)

def build_mig_status_6_cat_withdoe(migrant_indicators):
"""
6-category migrant status (priority order):
- Definite migrant: not_born_in_uk
- Highly likely migrant: immig_status_excl_refugee_asylum OR refugee_asylum_status
- Likely migrant: english_not_main_language OR interpreter_required OR trafficking OR date_of_uk_entry
- Definite non-migrant: born_in_uk
- Likely non-migrant: british_ethnicities AND no migrant code
- Unknown: no migrant codes
"""
migrant = migrant_indicators.get("any_migrant", False)
not_born_in_uk = migrant_indicators.get("not_born_in_uk", False)
immig_excl = migrant_indicators.get("immig_status_excl_refugee_asylum", False)
refugee_asylum = migrant_indicators.get("refugee_asylum_status", False)
english_not_main = migrant_indicators.get("english_not_main_language", False)
interpreter_required = migrant_indicators.get("interpreter_required", False)
trafficking = migrant_indicators.get("trafficking", False)
born_in_uk = migrant_indicators.get("born_in_uk", False)
british_ethnicities = migrant_indicators.get("british_ethnicities", False)
date_of_uk_entry = migrant_indicators.get("date_of_uk_entry", False)

# Compose combined conditions
highly_likely = immig_excl | refugee_asylum
likely_migrant = english_not_main | interpreter_required | trafficking | date_of_uk_entry
likely_non_migrant = ((british_ethnicities) & ~migrant)
unknown = (~migrant)

return case(
when(not_born_in_uk).then("Definite migrant"),
when(born_in_uk).then("Definite non-migrant"),
when(highly_likely).then("Highly likely migrant"),
when(likely_migrant).then("Likely migrant"),
when(likely_non_migrant).then("Likely non-migrant"),
when(unknown).then("Unknown"),
otherwise="Error"
)
Loading