-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathProtocol1.R
More file actions
61 lines (51 loc) · 2.04 KB
/
Protocol1.R
File metadata and controls
61 lines (51 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# sourcing the protocols required functions
# Packeges required:
# ape, parallel, Rfast, Rfast2
source('scripts/mitoP.R')
## To create the mitoP "full_data"
#1 reading the individual alignments
full_data_loci <- lapply(X = dir(path = 'main_datasets/mitoP_original_seqIDs/',
pattern = '\\.fast$',
full.names = TRUE),
function(x) {
x <- as.matrix(read.FASTA(file = x,
type = 'AA'));
x[sort(labels(x)),]
})
#2 Remove sequence unique identifiers prior to concatenation
full_data_loci <- lapply(full_data_loci, function(z) {
updateLabel(x = z,
old = rownames(z),
new = sub("^((?:[^_]+_){3}).*", "\\1", rownames(z)))
})
#3 Concatenate
mitoP_full_data <- catAAbin(full_data_loci)
# Write 'mitoP_full_data.fasta' file:
write.FASTA(x = mitoP_full_data,
file = "main_datasets/mitoP_full_data.fasta")
## To Generate the mitoP "full_data + P1 mask"
#1) create the subsamples set:
# For reproducibility, the subsets and their inferred ML trees used in the
# paper are saved in the object mitoP_P1_subsets.RData in the path
# (intermediate/P1/subsets)
mitoP_P1_subsets <- subsamG(x = full_data_loci, y = 14, z = 2000) # This will generate 1976 subsamples
## To reproduce the analyses of the paper:
load('intermediate_data/P1/P1_subsets_trees.RData')
P1_results <- txerr(tr = P1_subsets_trees[[4]],
subg = P1_subsets_trees,
rm = 0.10,
rgn = 0.5,
rtx = 0.5,
outrgx = "^ub")
# Write 'mitoP_P1_mask.fasta' file:
write.FASTA(x = P1_results[[1]],
file = "main_datasets/mitoP_P1_data.fasta")
# Write updated single gene alignment
dir.create( p1out <- 'P1_output')
gaps <- charToRaw(c('-?xX'))
invisible(lapply(seq_along(P1_results[[2]]), function(z) {
z1 <- P1_results[[2]][[z]]
z2 <- apply(z1, 1, function(z3) !all(z3 %in% gaps) )
z1 <- z1[z2, ]
write.FASTA(x = z1, file = sprintf('P1_output/p1mitoP%02d.fast', z))
}))