-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.py
More file actions
executable file
·41 lines (25 loc) · 1.19 KB
/
run.py
File metadata and controls
executable file
·41 lines (25 loc) · 1.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import numpy as np
import matplotlib.pyplot as plt
# Declaring two empty lists. One for words and one for their corresponding vector representation
words = []
vecs = []
# Getting word and corresponding vector from each line of the model.vec file generated by fasttext
import codecs # To open the file in specific mode
with codecs.open('model.vec', 'r', 'utf-8') as f_in:
vocabulary, wv = zip(*[line.strip().split(' ', 1) for line in f_in])
# Populating the two lists. Need to convert vector values from string to numpy array
for i in range(10,len(vocabulary)): # Usually skip first 10 words becuase they might be garbage values.
words.append(vocabulary[i])
x = wv[i]
vecs.append(np.fromstring(x,dtype='float32',sep=' '))
# np.fromstring takes string values and converts to float32 with space as a separator
# Carrying out Singular Value Decomposition
U, s, Vh = np.linalg.svd(vecs,full_matrices=False)
# Plotting words and their vector representations
for i in range(len(words)):
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.text(U[i,0], U[i,1], words[i])
plt.xlim((-0.5,0.5))
plt.ylim((-0.5,0.5))
plt.savefig('viz.jpg')