-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathplot_trees.R
More file actions
344 lines (316 loc) · 10.7 KB
/
plot_trees.R
File metadata and controls
344 lines (316 loc) · 10.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
#######################
# Author: B.M. Anderson
# Date: 16 Mar 2020
# Modified: Nov 2023 (simplified and made more dependent on input text files), Sep 2024
# Feb 2026 (added cladogram option and associated rooting without edge lengths)
# Description: plot input Newick trees with ape, potentially rooting them
#######################
# load libraries
suppressMessages(library(ape))
suppressMessages(library(phytools))
# a help function for when the script is called without arguments or incorrectly
help <- function(help_message) {
if (missing(help_message)) {
cat("A script to plot a set of unrooted Newick trees, or rooted if outgroups are specified\n\n")
cat("Usage: Rscript plot_trees.R <options> tree_file1 tree_file2...\n")
cat("Options:\n")
cat("\t-b\tBootstrap support level below which node labels are not displayed [optional]\n")
cat("\t-c\tColours for tips in a text file, listing taxon and colour (tab separated, one per line) [optional]\n")
cat("\t\tNote: needs a \"taxon\" entry in the samples file to know which to colour\n")
cat("\t-l\tLabels to title the trees in a text file (one per line, same order as the files) [optional]\n")
cat("\t-o\tOutgroup sampleIDs to use for rooting in a text file (one per line) [optional]\n")
cat("\t-s\tSampleIDs, display names, and taxa in a text file (tab separated, one per line) [optional]\n")
cat("\t-svg\tFlag for whether to output an SVG file for each tree [default: do not]\n")
cat("\t-w\tFlag for whether to write the tips in the order plotted, from base of tree up\n")
cat("\t-clado\tFlag for whether to plot as cladograms (no meaning to branch lengths) [default: do not]\n\n")
} else {
cat(help_message)
}
}
# parse the command line
args <- commandArgs(trailingOnly = TRUE)
if (length(args) == 0) { # nolint
stop(help(), call. = FALSE)
} else {
catch_args <- vector("list")
extra <- 1
catch <- TRUE
bootstrap <- 0
colours_present <- FALSE
colours_file <- ""
labels_present <- FALSE
labels_file <- ""
outgroup_present <- FALSE
outgroup_file <- ""
samples_present <- FALSE
samples_file <- ""
svg_out <- FALSE
write_tips <- FALSE
plot_clado <- FALSE
for (index in seq_len(length(args))) {
if (args[index] == "-b") {
bootstrap <- as.numeric(args[index + 1])
catch <- FALSE
} else if (args[index] == "-c") {
colours_present <- TRUE
colours_file <- args[index + 1]
catch <- FALSE
} else if (args[index] == "-l") {
labels_present <- TRUE
labels_file <- args[index + 1]
catch <- FALSE
} else if (args[index] == "-o") {
outgroup_present <- TRUE
outgroup_file <- args[index + 1]
catch <- FALSE
} else if (args[index] == "-s") {
samples_present <- TRUE
samples_file <- args[index + 1]
catch <- FALSE
} else if (args[index] == "-svg") {
svg_out <- TRUE
} else if (args[index] == "-w") {
write_tips <- TRUE
} else if (args[index] == "-clado") {
plot_clado <- TRUE
} else {
if (catch) {
catch_args[extra] <- args[index]
extra <- extra + 1
} else {
catch <- TRUE
}
}
}
}
if (length(catch_args) < 1) {
stop(help("Missing tree file(s)!\n"), call. = FALSE)
}
cat("Bootstrap threshold for node label printing is", bootstrap, "\n")
# read in and parse the files
if (colours_present) {
colour_table <- read.table(colours_file, sep = "\t", header = FALSE)
}
if (labels_present) {
labels <- read.table(labels_file, sep = "\t", header = FALSE)[, 1]
cat("Labels for tree titles are:", unlist(labels), "\n")
}
if (outgroup_present) {
outgroups <- read.table(outgroup_file, sep = "\t", header = FALSE)[, 1]
cat("Outgroups are:", unlist(outgroups), "\n")
}
if (samples_present) {
sample_table <- read.table(samples_file, sep = "\t", header = FALSE)
if (ncol(sample_table) == 2) {
colnames(sample_table) <- c("ID", "label")
} else if (ncol(sample_table) == 3) {
colnames(sample_table) <- c("ID", "label", "taxon")
} else {
stop(help("Samples table formatted improperly!\n"), call. = FALSE)
}
}
tree_list <- vector("list")
for (index in seq_len(length(catch_args))) {
tree <- read.tree(catch_args[[index]])
tree_list[[index]] <- tree
}
# check that trees have enough titles, if provided, or generate based on file name
if (labels_present) {
if (length(labels) != length(tree_list)) {
stop(help("The number of trees and labels do not match!\n"), call. = FALSE)
}
} else {
labels <- vector("list")
for (index in seq_len(length(tree_list))) {
labels[[index]] <- basename(catch_args[[index]])
}
}
# root the trees if outgroups are present
if (outgroup_present) {
for (index in seq_len(length(tree_list))) {
tree <- tree_list[[index]]
if (sum(outgroups %in% tree$tip.label) > 0) {
these_outgroups <- outgroups[outgroups %in% tree$tip.label]
if (length(these_outgroups) > 1) {
if (is.monophyletic(tree, as.character(these_outgroups))) {
rootnode <- getMRCA(tree, as.character(these_outgroups))
if (plot_clado) {
rooted_tree <- root(tree, node = rootnode,
resolve.root = TRUE, edgelabel = TRUE)
} else {
rootnode <- getMRCA(tree, as.character(these_outgroups))
position <- 0.5 * tree$edge.length[which(tree$edge[, 2] == rootnode)]
rooted_tree <- reroot(tree, rootnode, position, edgelabel = TRUE)
}
tree_list[[index]] <- rooted_tree
} else {
cat("Tree", basename(catch_args[[index]]),
"does not have monophyletic outgroups, so it is not rooted\n")
}
} else {
if (plot_clado) {
rooted_tree <- root(tree, as.character(these_outgroups), resolve.root = TRUE,
edgelabel = TRUE)
} else {
tip_number <- which(tree$tip.label == these_outgroups)
rootnode <- tree$edge[tree$edge[, 2] == tip_number, 1]
position <- 0.5 * tree$edge.length[which(tree$edge[, 2] == rootnode)]
rooted_tree <- reroot(tree, rootnode, position, edgelabel = TRUE)
}
tree_list[[index]] <- rooted_tree
}
} else {
cat("Tree", basename(catch_args[[index]]),
"has no outgroups, so it is not rooted\n")
}
}
}
# Remove node labels if less than specified
# also determine the tree with the most taxa for dimensions
max_tips <- 0
for (index in seq_len(length(tree_list))) {
for (index2 in seq_len(length(tree_list[[index]]$node.label))) {
if (tree_list[[index]]$node.label[index2] == "") {
tree_list[[index]]$node.label[index2] <- ""
} else if (tree_list[[index]]$node.label[index2] == "Root") {
tree_list[[index]]$node.label[index2] <- ""
} else if (as.numeric(tree_list[[index]]$node.label[index2]) < bootstrap) {
tree_list[[index]]$node.label[index2] <- ""
}
}
if (length(tree_list[[index]]$tip.label) > max_tips) {
max_tips <- length(tree_list[[index]]$tip.label)
}
}
# Set sample colours, modifying them if both a colours file is specified AND there is a taxon column in the samples file
if (samples_present) {
sample_table$colour <- rep("black", length(nrow(sample_table)))
if (all(c(colours_present, ncol(sample_table) == 4))) {
for (taxon in unique(sample_table$taxon)) {
if (taxon %in% colour_table$V1) {
colour <- colour_table$V2[match(taxon, colour_table$V1)]
sample_table$colour[sample_table$taxon == taxon] <- colour
}
}
}
}
# Substitute labels and colours if a samples file is present
tip_colours <- vector("list")
if (samples_present) {
for (index in seq_len(length(tree_list))) {
tips <- tree_list[[index]]$tip.label
new_tips <- tips
tip_col <- rep("black", length(tips))
for (tind in seq_len(length(tips))) {
if (tips[tind] %in% sample_table$ID) {
new_tips[tind] <- sample_table$label[match(tips[tind], sample_table$ID)]
tip_col[tind] <- sample_table$colour[match(tips[tind], sample_table$ID)]
}
}
tree_list[[index]]$tip.label <- new_tips
tip_colours[[index]] <- tip_col
}
} else {
for (index in seq_len(length(tree_list))) {
tip_colours[[index]] <- rep("black", length(tree_list[[index]]$tip.label))
}
}
# Plot trees
max_height <- max(c(max_tips / 5, 12))
pdf("trees.pdf", family = "ArialMT", width = (2 * max_height / 3), height = max_height)
cat("Plotting", length(tree_list), "trees to pdf\n")
for (index in seq_len(length(tree_list))) {
if (plot_clado) {
plot.phylo(ladderize(tree_list[[index]], right = FALSE),
no.margin = FALSE,
font = 1,
edge.width = 2,
label.offset = 0.1,
use.edge.length = FALSE,
node.depth = 2,
tip.col = tip_colours[[index]],
main = labels[[index]])
if (is.null(tree_list[[index]]$node.label)) {
cat("No nade labels detected\n")
} else {
drawSupportOnEdges(tree_list[[index]]$node.label,
adj = c(0.5, -0.5),
frame = "none")
}
} else {
plot.phylo(ladderize(tree_list[[index]], right = FALSE),
no.margin = FALSE,
font = 1,
edge.width = 2,
label.offset = max(nodeHeights(tree)) / 200,
tip.col = tip_colours[[index]],
main = labels[[index]])
add.scale.bar(x = mean(par("usr")[1:2]),
y = par("usr")[3] + 1,
font = 1, lwd = 2)
if (is.null(tree_list[[index]]$node.label)) {
cat("No nade labels detected\n")
} else {
drawSupportOnEdges(tree_list[[index]]$node.label,
adj = c(0.5, -0.5),
frame = "none")
}
}
if (write_tips) {
lad_tree <- ladderize(tree_list[[index]], right = FALSE)
# determine which edges are tips and get the order
is_tip <- lad_tree$edge[, 2] <= length(lad_tree$tip.label)
ordered_tips <- lad_tree$edge[is_tip, 2]
# get the tips in order of plotting
output_ordered_ids <- lad_tree$tip.label[ordered_tips]
# write to a file
connection <- file(paste0("tips_", index, ".txt"))
writeLines(output_ordered_ids, connection)
close(connection)
}
}
invisible(dev.off())
if (svg_out) {
for (index in seq_len(length(tree_list))) {
svg(paste0("tree_", index, ".svg"), family = "ArialMT",
width = (2 * max_height / 3), height = max_height)
cat("Plotting Tree", basename(catch_args[[index]]), "to svg\n")
if (plot_clado) {
plot.phylo(ladderize(tree_list[[index]], right = FALSE),
no.margin = FALSE,
font = 1,
edge.width = 2,
label.offset = 0.1,
use.edge.length = FALSE,
node.depth = 2,
tip.col = tip_colours[[index]],
main = labels[[index]])
if (is.null(tree_list[[index]]$node.label)) {
cat("No nade labels detected\n")
} else {
drawSupportOnEdges(tree_list[[index]]$node.label,
adj = c(0.5, -0.5),
frame = "none")
}
} else {
plot.phylo(ladderize(tree_list[[index]], right = FALSE),
no.margin = FALSE,
font = 1,
edge.width = 2,
label.offset = max(nodeHeights(tree)) / 200,
tip.col = tip_colours[[index]],
main = labels[[index]])
add.scale.bar(x = mean(par("usr")[1:2]),
y = par("usr")[3] + 1,
font = 1, lwd = 2)
if (is.null(tree_list[[index]]$node.label)) {
cat("No nade labels detected\n")
} else {
drawSupportOnEdges(tree_list[[index]]$node.label,
adj = c(0.5, -0.5),
frame = "none")
}
}
invisible(dev.off())
}
}