-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathQ2_1.py
More file actions
39 lines (30 loc) · 987 Bytes
/
Q2_1.py
File metadata and controls
39 lines (30 loc) · 987 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from numpy import load, array
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
def kmeans(data, first_n_genes, clusters):
x = []
for i in range(first_n_genes):
x.append(data[i])
x = array(x)
kmeans = KMeans(n_clusters=clusters, random_state=0).fit(x)
#len(labels_) = first_n_genes, labels_[n] = cluster for nth gene
return kmeans.labels_
def plot_kmeans(data, first_n_genes, clusters):
groups = kmeans(data, first_n_genes, clusters)
plt.hist(groups, bins=clusters)
plt.xlabel('cluster')
plt.ylabel('# genes in cluster')
plt.title('K-means cluster sizes for k=' + str(clusters))
plt.show()
return
def q2_1(file, k):
with load(file) as data:
gene_names = data['Gene_Name']
seq = data['SeqData']
plot_kmeans(seq, 1000, k)
return
if __name__ == '__main__':
file = 'Data1.npz'
q2_1(file, 10)
q2_1(file, 20)
q2_1(file, 50)