|
1 | 1 | BIOMART = "http://www.ensembl.org/biomart/martservice" |
2 | 2 | """The Url used by Biomart to accept requests""" |
| 3 | + |
3 | 4 | BIOMART_XML_REQUESTS = { |
4 | | - "IDs+desc": { |
5 | | - "query": """<?xml version="1.0" encoding="UTF-8"?> |
| 5 | + "entrez": """<?xml version="1.0" encoding="UTF-8"?> |
6 | 6 | <!DOCTYPE Query> |
7 | | -<Query virtualSchemaName = "default" formatter = "CSV" header = "0" uniqueRows = "0" count = "" datasetConfigVersion = "0.6" > |
| 7 | +<Query virtualSchemaName = "default" formatter = "TSV" header = "1" uniqueRows = "1" datasetConfigVersion = "0.6" > |
8 | 8 |
|
9 | 9 | <Dataset name = "hsapiens_gene_ensembl" interface = "default" > |
10 | 10 | <Filter name = "biotype" value = "protein_coding"/> |
11 | 11 | <Attribute name = "ensembl_gene_id_version" /> |
12 | | - <Attribute name = "ensembl_transcript_id_version" /> |
13 | | - <Attribute name = "description" /> |
14 | | - <Attribute name = "external_gene_name" /> |
15 | | - <Attribute name = "ensembl_peptide_id_version" /> |
16 | | - <Attribute name = "entrezgene_id" /> |
17 | | - <Attribute name = "pdb" /> |
18 | | - <Attribute name = "refseq_mrna" /> |
| 12 | + <Attribute name = "entrezgene_id" /> |
19 | 13 | </Dataset> |
20 | 14 | </Query>""", |
21 | | - "colnames": [ |
22 | | - "ensembl_gene_id_version", |
23 | | - "ensembl_transcript_id_version", |
24 | | - "description", |
25 | | - "external_gene_name", |
26 | | - "ensembl_peptide_id_version", |
27 | | - "entrezgene_id", |
28 | | - "pdb", |
29 | | - "refseq_mrna", |
30 | | - ], |
31 | | - }, |
32 | | - "hugo_symbols": { |
33 | | - "query": """<?xml version="1.0" encoding="UTF-8"?> |
| 15 | + "IDs": """<?xml version="1.0" encoding="UTF-8"?> |
34 | 16 | <!DOCTYPE Query> |
35 | | -<Query virtualSchemaName = "default" formatter = "CSV" header = "0" uniqueRows = "0" count = "" datasetConfigVersion = "0.6" > |
| 17 | +<Query virtualSchemaName = "default" formatter = "TSV" header = "1" uniqueRows = "1" datasetConfigVersion = "0.6" > |
36 | 18 |
|
37 | 19 | <Dataset name = "hsapiens_gene_ensembl" interface = "default" > |
38 | 20 | <Filter name = "biotype" value = "protein_coding"/> |
39 | | - <Attribute name = "hgnc_id" /> |
40 | | - <Attribute name = "hgnc_symbol" /> |
41 | 21 | <Attribute name = "ensembl_gene_id_version" /> |
| 22 | + <Attribute name = "ensembl_transcript_id_version" /> |
42 | 23 | </Dataset> |
43 | 24 | </Query>""", |
44 | | - "colnames": ["hgnc_id", "hgnc_symbol", "ensembl_gene_id_version"], |
45 | | - }, |
46 | | - "IDs": { |
47 | | - "query": """<?xml version="1.0" encoding="UTF-8"?> |
| 25 | + "proteins": """<?xml version="1.0" encoding="UTF-8"?> |
48 | 26 | <!DOCTYPE Query> |
49 | | -<Query virtualSchemaName = "default" formatter = "CSV" header = "0" uniqueRows = "0" count = "" datasetConfigVersion = "0.6" > |
| 27 | +<Query virtualSchemaName = "default" formatter = "TSV" header = "1" uniqueRows = "1" datasetConfigVersion = "0.6" > |
50 | 28 |
|
51 | 29 | <Dataset name = "hsapiens_gene_ensembl" interface = "default" > |
52 | 30 | <Filter name = "biotype" value = "protein_coding"/> |
53 | | - <Attribute name = "ensembl_gene_id" /> |
54 | | - <Attribute name = "ensembl_transcript_id" /> |
55 | | - <Attribute name = "ensembl_peptide_id" /> |
56 | | - <Attribute name = "version" /> |
57 | | - <Attribute name = "transcript_version" /> |
58 | | - <Attribute name = "peptide_version" /> |
| 31 | + <Attribute name = "ensembl_transcript_id_version" /> |
| 32 | + <Attribute name = "ensembl_peptide_id_version" /> |
| 33 | + <Attribute name = "pdb" /> |
59 | 34 | <Attribute name = "refseq_mrna" /> |
60 | | - <Attribute name = "refseq_peptide" /> |
| 35 | + <Attribute name = "refseq_peptide" /> |
| 36 | + </Dataset> |
| 37 | +</Query>""", |
| 38 | + "gene_names": """<?xml version="1.0" encoding="UTF-8"?> |
| 39 | +<!DOCTYPE Query> |
| 40 | +<Query virtualSchemaName = "default" formatter = "TSV" header = "1" uniqueRows = "1" datasetConfigVersion = "0.6" > |
| 41 | +
|
| 42 | + <Dataset name = "hsapiens_gene_ensembl" interface = "default" > |
| 43 | + <Filter name = "biotype" value = "protein_coding"/> |
| 44 | + <Attribute name = "hgnc_id" /> |
| 45 | + <Attribute name = "hgnc_symbol" /> |
| 46 | + <Attribute name = "description" /> |
| 47 | + <Attribute name = "ensembl_gene_id_version" /> |
61 | 48 | </Dataset> |
62 | 49 | </Query>""", |
63 | | - "colnames": [ |
64 | | - "ensembl_gene_id", |
65 | | - "ensembl_transcript_id", |
66 | | - "ensembl_peptide_id", |
67 | | - "version", |
68 | | - "transcript_version", |
69 | | - "peptide_version", |
70 | | - "refseq_mrna", |
71 | | - "refseq_peptide", |
72 | | - ], |
73 | | - }, |
74 | 50 | } |
75 | 51 | """Hardpoints with Biomart data. |
76 | 52 |
|
77 | | -In the form of 'table_name': {'query': xlm_query, 'colnames': [list of colnames]} |
| 53 | +In the form of 'table_name': 'xml_query' |
78 | 54 | """ |
79 | 55 |
|
80 | 56 | TCDB = { |
|
104 | 80 |
|
105 | 81 | IUPHAR_DB = "https://www.guidetopharmacology.org/DATA/public_iuphardb_v2022.2.zip" |
106 | 82 | """URL to the download of the full IUPHAR database""" |
| 83 | + |
107 | 84 | IUPHAR_COMPILED = { |
108 | 85 | "targets+families": "https://www.guidetopharmacology.org/DATA/targets_and_families.csv", |
109 | 86 | "ligands": "https://www.guidetopharmacology.org/DATA/ligands.csv", |
|
112 | 89 | """URLs to the compiled IUPHAR data from their downloads page""" |
113 | 90 |
|
114 | 91 | HUGO = { |
115 | | - "nomenclature": "http://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2021-03-01.txt", |
| 92 | + "nomenclature": "https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2023-04-01.txt", |
116 | 93 | "groups": { |
117 | 94 | # I could download json files, but most of the data is flat anyway, so... |
118 | 95 | "endpoint": "https://www.genenames.org/cgi-bin/genegroup/download?id={id}&type=branch", |
|
138 | 115 | """Hugo downloads as found on their download pages""" |
139 | 116 |
|
140 | 117 | SLC_TABLES = "http://slc.bioparadigms.org/" |
| 118 | +"""URL to the SLC tables that have data regarding solute carriers""" |
0 commit comments