-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathModel.py
More file actions
120 lines (89 loc) · 3.28 KB
/
Model.py
File metadata and controls
120 lines (89 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import pickle
import tensorflow as tf
print(tf.__version__)
from tensorflow.keras.models import Sequential
import re
import numpy as np
import nltk
from nltk.stem import wordnet, WordNetLemmatizer
from nltk.corpus import stopwords
nltk.download("wordnet")
nltk.download("stopwords")
emotions = ["anger", "fear", "joy", "love", "sadness", "surprise"]
def GetStopwords():
words = stopwords.words("english")
words.remove("not")
return words
EnglishStopwords = GetStopwords()
tokenizer = None
with open("tokenizer.pkl", "rb") as f:
tokenizer = pickle.loads(f.read())
num_words = len(tokenizer.index_word) + 1
def CleanFeatures(features):
'''
takes 2D numpy array of text data and
removes stopwords, non-alphanumeric characters,
trailing whitespaces, and applies lemmatization
'''
lemma = WordNetLemmatizer()
sentences = features.flatten()
cleaned = []
for sentence in sentences:
sentence = re.sub("[^a-zA-Z]", " ", sentence)
sentence = sentence.lower()
sentence = sentence.split()
sentence = [lemma.lemmatize(word) for word in sentence if word not in set(EnglishStopwords)]
sentence = " ".join(sentence)
cleaned.append(sentence)
return cleaned
def TokenizeTestData(testData, tokenizerObject):
'''
testData: 1D array of sentences
returns 2D array of
'''
sequences = tokenizerObject.texts_to_sequences(testData)
return tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen = 150, dtype='int32')
@tf.autograph.experimental.do_not_convert
def CreateModel():
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Embedding(num_words, 480, input_length=150))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(units = 128, activation="relu"))
model.add(tf.keras.layers.Dropout(rate = 0.15))
model.add(tf.keras.layers.Dense(units = 64, activation="relu"))
model.add(tf.keras.layers.Dense(units = 6, activation="softmax"))
model.compile(optimizer = "adam", loss = "categorical_crossentropy", metrics = ["accuracy"])
return model
@tf.autograph.experimental.do_not_convert
def CreateMinifiedModel():
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Embedding(num_words, 240, input_length=150))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(units = 64, activation="relu"))
model.add(tf.keras.layers.Dropout(rate = 0.15))
model.add(tf.keras.layers.Dense(units = 48, activation="relu"))
model.add(tf.keras.layers.Dense(units = 6, activation="softmax"))
model.compile(optimizer = "adam", loss = "categorical_crossentropy", metrics = ["accuracy"])
return model
model = CreateMinifiedModel()
model.load_weights("test/test")
def replace_not_alphabetical_chars(s: str):
return re.sub("[^a-zA-Z]", " ", s)
def SplitSentence(s: str):
s = s.lower()
s = s.split()
return s
def softmax(x):
y = np.exp(x - np.max(x))
f_x = y / np.sum(np.exp(x))
return f_x
def MakePred(s):
s = CleanFeatures(np.array([[s]]))
s = TokenizeTestData(s, tokenizer)
all_sentence_preds = model.predict(s)[0]
result = list(map(float, all_sentence_preds))
final = [{"name": emotions[i], "percentage": result[i]} for i in range(6)]
return final
if __name__ == "__main__":
for sent in MakePred("Today was an excellent day!"):
print(sent)