-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathupgma.py
More file actions
79 lines (60 loc) · 2.16 KB
/
upgma.py
File metadata and controls
79 lines (60 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def lowest_cell(table):
# Set default to infinity
min_cell = float("inf")
x, y = -1, -1
# Go through every cell, looking for the lowest
for i in range(len(table)):
for j in range(len(table[i])):
if table[i][j] < min_cell:
min_cell = table[i][j]
x, y = i, j
# Return the x, y co-ordinate of cell
return x, y
# join_labels:
# Combines two labels in a list of labels
def join_labels(labels, a, b):
# Swap if the indices are not ordered
if b < a:
a, b = b, a
# Join the labels in the first index
labels[a] = "(" + labels[a] + "," + labels[b] + ")"
# Remove the (now redundant) label in the second index
del labels[b]
# join_table:
# Joins the entries of a table on the cell (a, b) by averaging their data entries
def join_table(table, a, b):
# Swap if the indices are not ordered
if b < a:
a, b = b, a
# For the lower index, reconstruct the entire row (A, i), where i < A
row = []
for i in range(0, a):
row.append((table[a][i] + table[b][i])/2)
table[a] = row
# Then, reconstruct the entire column (i, A), where i > A
# Note: Since the matrix is lower triangular, row b only contains values for indices < b
for i in range(a+1, b):
table[i][a] = (table[i][a]+table[b][i])/2
# We get the rest of the values from row i
for i in range(b+1, len(table)):
table[i][a] = (table[i][a]+table[i][b])/2
# Remove the (now redundant) second index column entry
del table[i][b]
# Remove the (now redundant) second index row
del table[b]
# UPGMA:
# Runs the UPGMA algorithm on a labelled table
def UPGMA(table, labels):
# Until all labels have been joined...
table=table.tolist()
for i in range(len(table)):
table[i]=[mm for mm in table[i] if mm!=0]
while len(labels) > 1:
# Locate lowest cell in the table
x, y = lowest_cell(table)
# Join the table on the cell co-ordinates
join_table(table, x, y)
# Update the labels accordingly
join_labels(labels, x, y)
# Return the final label
return labels[0]