forked from josephwon0310/Launchpad
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathword2vec.py
More file actions
31 lines (25 loc) · 812 Bytes
/
word2vec.py
File metadata and controls
31 lines (25 loc) · 812 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from collections import Iterable
import gensim
import numpy as np
import parser
def load():
global model
model = gensim.models.KeyedVectors.load_word2vec_format('./models/GoogleNews-vectors-negative300.bin', binary=True, limit=500000)
def wmdistance(a, b):
return model.wmdistance(a, b)
def is_valid(x):
if isinstance(x, str):
return x in model.wv
if isinstance(x, Iterable):
valid = [is_valid(s) for s in x]
return True in valid
return NotImplementedError
def word_vector(word):
return model.wv[word]
def sentence_vector(words):
if isinstance(words, str):
words = parser.line_to_words(words)
word_vectors = np.array([word_vector(word) for word in words if is_valid(word)])
if (len(word_vectors)):
return np.array([sum(i) for i in zip(*word_vectors)])
return None