Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion OT_SCHEMA_VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
917e836
v26.03.0
3 changes: 2 additions & 1 deletion cmat/output_generation/clinvar_to_evidence_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def validate_evidence_string(ev_string, ot_schema_contents):
def launch_pipeline(clinvar_xml_file, efo_mapping_file, gene_mapping_file, ot_schema_file, dir_out, start, end):
os.makedirs(dir_out, exist_ok=True)
ot_schema_contents = json.loads(open(ot_schema_file).read())
string_to_efo_mappings, _, nonmatching_mappings = load_ontology_mapping(efo_mapping_file, ot_schema_contents)
ontology_id_regex = ot_schema_contents['definitions']['diseaseFromSourceMappedId']['pattern']
string_to_efo_mappings, _, nonmatching_mappings = load_ontology_mapping(efo_mapping_file, ontology_id_regex)
variant_to_gene_mappings = CT.process_consequence_type_file(gene_mapping_file)

# Output mappings that don't conform to the schema in a separate file
Expand Down
5 changes: 1 addition & 4 deletions cmat/trait_mapping/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def string_to_preferred_ontologies(ontology_string, target_ontology):
return preferred_ontologies


def load_ontology_mapping(trait_mapping_file, schema=None):
def load_ontology_mapping(trait_mapping_file, ontology_id_regex='.*'):
"""
Load ontology mappings from a TSV file.

Expand All @@ -50,9 +50,6 @@ def load_ontology_mapping(trait_mapping_file, schema=None):
target_ontology = 'EFO'
n_ontology_mappings = 0
in_header = True
ontology_id_regex = '.*'
if schema:
ontology_id_regex = schema['definitions']['diseaseFromSourceMappedId']['pattern']
nonmatching_mappings = []

with open(trait_mapping_file, 'rt') as f:
Expand Down
2 changes: 1 addition & 1 deletion tests/output_generation/evaluation/test_ols_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def test_fetch_eval_data():

def test_fetch_eval_data_include_neighbors():
expected = ('MONDO:0004975', False, {'MONDO:0004975'},
{'EFO:0005815', 'MONDO:0001627'},
{'MONDO:0005574', 'MONDO:0001627'},
{'MONDO:0010422', 'MONDO:0014036', 'MONDO:0014265', 'MONDO:0014316', 'MONDO:0100087'})
assert fetch_eval_data(db_iden=('MONDO', 'MONDO:0004975'), include_neighbors=True) == expected

Expand Down
13 changes: 2 additions & 11 deletions tests/output_generation/test_clinvar_to_evidence_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,8 @@ def test_efo_mapping(self):
('http://www.ebi.ac.uk/efo/EFO_0001645', 'coronary heart disease')]

def test_efo_mapping_with_schema(self):
schema = {
"definitions": {
"diseaseFromSourceMappedId": {
"type": "string",
"description": "Identifier of the disease in the EFO ontology",
"pattern": "(^NCIT_|^Orphanet_|^GO_|^HP_|^EFO_|^MONDO_|^DOID_|^MP_|^OTAR_|^PATO_|^OBI_|^OBA_|^OGMS_|^GSSO_|^UBERON_)",
"examples": ["EFO_0005537"]
}
}
}
mappings, _, nonmatching_mappings = load_ontology_mapping(config.efo_mapping_file, schema)
ontology_id_regex = "(^NCIT_|^Orphanet_|^GO_|^HP_|^EFO_|^MONDO_|^DOID_|^MP_|^OTAR_|^PATO_|^OBI_|^OBA_|^OGMS_|^GSSO_|^UBERON_)"
mappings, _, nonmatching_mappings = load_ontology_mapping(config.efo_mapping_file, ontology_id_regex)
assert len(mappings) == 10
assert 'tbc1 domain family member 24' not in mappings
assert len(nonmatching_mappings) == 1
Expand Down
4 changes: 2 additions & 2 deletions tests/trait_mapping/resources/string_to_ontology_mappings.tsv
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#clinvar_trait_name uri label
11p partial monosomy syndrome http://purl.obolibrary.org/obo/MONDO_0008681 WAGR syndrome
3 beta-hydroxysteroid dehydrogenase deficiency http://www.orpha.net/ORDO/Orphanet_90791 Congenital adrenal hyperplasia due to 3-beta-hydroxysteroid dehydrogenase deficiency
coronary artery disease/myocardial infarction http://www.ebi.ac.uk/efo/EFO_0000612 myocardial infarction
coronary artery disease/myocardial infarction http://www.ebi.ac.uk/efo/EFO_0001645 coronary heart disease
coronary artery disease/myocardial infarction http://purl.obolibrary.org/obo/MONDO_0005068 myocardial infarction
coronary artery disease/myocardial infarction http://purl.obolibrary.org/obo/MONDO_0005010 coronary heart disease
frontotemporal dementia, ubiquitin-positive http://www.orpha.net/ORDO/Orphanet_282 Frontotemporal dementia
meckel syndrome, type 3 http://purl.obolibrary.org/obo/MONDO_0018921 Meckel syndrome
renal-hepatic-pancreatic dysplasia 2 http://www.orpha.net/ORDO/Orphanet_294415 Renal-hepatic-pancreatic dysplasia
Expand Down
2 changes: 1 addition & 1 deletion tests/trait_mapping/test_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_find_replacement_mapping():
# Deprecated in EFO but replacement is also deprecated, so use its replacement
assert find_replacement_mapping(
trait_name, 'http://www.orpha.net/ORDO/Orphanet_226316', target_ontology, preferred_ontologies
) == 'http://purl.obolibrary.org/obo/MONDO_0011792|thyroid dyshormonogenesis 6|TOKEN_MATCH_SYNONYM|MONDO_HP_NOT_EFO'
) == 'http://purl.obolibrary.org/obo/MONDO_0011792|thyroid dyshormonogenesis 6|TOKEN_MATCH_SYNONYM|EFO_CURRENT'


def test_to_mapping_string():
Expand Down
4 changes: 2 additions & 2 deletions tests/trait_mapping/test_trait_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ def test_ols_exact_ascii_match(self):
# Search should be agnostic to accents and other non-ASCII characters
trait = Trait('pelger-huët anomaly', None, None)
processed_trait = self.run_process_trait(trait)
assert len(processed_trait.ols_result_list) == 11
assert len(processed_trait.ols_result_list) == 10
assert processed_trait.is_finished
assert {m.uri for m in processed_trait.finished_mapping_set} == {'http://www.ebi.ac.uk/efo/EFO_1001093'}
assert {m.uri for m in processed_trait.finished_mapping_set} == {'http://purl.obolibrary.org/obo/MONDO_0008214'}

def test_multiple_mappings(self):
# Multiple mappings from OLS
Expand Down
Loading