-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtobiom.py
More file actions
67 lines (51 loc) · 1.43 KB
/
tobiom.py
File metadata and controls
67 lines (51 loc) · 1.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import sys
from ete3 import NCBITaxa
genome_col = 0
abundance_col = 2
lineage_col = 5
ncbiTaxa = NCBITaxa()
class Ranks:
ncbi = ["superkingdom","phylum","class","order","family","genus","species", "strain"]
gtdb = ["d__", "p__", "c__", "o__", "f__", "g__", "s__", "genome"]
ncbi2gtdb = {
'root': 'root',
'superkingdom': 'd__',
'kingdom': 'k__',
'phylum': 'p__',
'class': 'c__',
'order': 'o__',
'family': 'f__',
'genus': 'g__',
'species': 's__',
'strain': 'genome'
}
gtdb2ncbi = {
'root': 'root',
'd__': 'superkingdom',
'k__': 'kingdom',
'p__': 'phylum',
'c__': 'class',
'o__': 'order',
'f__': 'family',
'g__': 'genus',
's__': 'species',
'genome': 'strain'
}
def __init__(self):
pass
#genome size rel_abundance read_count sequencing_depth lineage absolute_path
composition_file = sys.argv[1]
rank_dict = dict()
with open(composition_file, 'r') as file:
for rank in Ranks.gtdb[:-1]:
for line in file:
line = line.rstrip()
tokens = line.split('\t')
genome = tokens[genome_col]
abundance = float(tokens[abundance_col])
taxonomy_str = tokens[lineage_col]
taxonomy = [taxon for taxon in taxonomy_str.split(';') if len(taxon) > 3]
for x in range(len(taxonomy)):
if taxonomy[x].startswith(rank):
break
file.seek(0)