-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathEmbeddingModel.py
More file actions
216 lines (188 loc) · 10.5 KB
/
EmbeddingModel.py
File metadata and controls
216 lines (188 loc) · 10.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
'''
Embedding Model class
- Main variables:
self.model - keras model
self.num_classes - number of outputs/classes
self.class_weights - class weights got from get_class_weights()
used in weighted_cross_entropy
(if using imblearn this becomes simple cross_entropy)
self.num_sites - number of sites from X w/ shape (num_examples, num_error, num_sites)
self.num_error - number of errors from X w/ shape (num_examples, num_error, num_sites)
self.num_embed - number of created embeddings for each site or error
self.embed_epochs - number of epochs to train embeddings
self.add_attention - if add [0,1] outputs for each site and error on which
model attention was mostly focused
self.sites_attention_model - model that returns sites attention. returns shape (num_sites,)
self.error_attention_model - model that returns error attention. returns shape (num_sites,)
self.model_params - self.create_model() parameters, element names must be the same !
- Main functions:
self.create_model - creates self.model
self.train - trains self.model w/ selected parameters
self.predict - predicts Y from X with self.model
self.change_inputs - function that changes inputs before feeding to model
self.load_model - loads model.h5 file from directory (dirpath)
self.save_model - saves model.h5 and model.json to directory (dirpath)
self.pretraining - creates embeddings
'''
import keras
from keras.layers import Input, Embedding, dot, Flatten, Dense, Dropout, Concatenate, Reshape, multiply
import numpy as np
from skopt.space import Real, Categorical, Integer
from utils.BaseModel import Model as BaseModel
from utils.losses import weighted_categorical_crossentropy
from utils.model_utils import get_class_weights
class EmbeddingModel(BaseModel):
def __init__( self, X, num_classes, num_sites=142, num_error=54, num_embed=20,
embedding_training_epochs=180, add_attention=False ):
'''
@param X w/ shape (num_examples, num_error, num_sites)
@param num_classes - number of outputs/classes
@param num_error - number of errors from X w/ shape (num_examples, num_error, num_sites)
@param num_sites - number of sites from X w/ shape (num_examples, num_error, num_sites)
@param num_embed - number of created embeddings for each site or error
@param embedding_training_epochs - number of epochs to train self.pretraining()
@param add_attention - if add [0,1] outputs for each site and error on which
model attention was mostly focused
'''
self.num_sites = num_sites
self.num_error = num_error
self.num_embed = num_embed
self.num_classes = num_classes
self.embed_epochs = embedding_training_epochs
self.add_attention = add_attention
self.pretraining(X)
self.model_params = {
'dense_layers':3,
'dense_units':50,
'dropout_value':0.2,
'learning_rate':1e-3,
}
def create_model(self, dense_layers, dense_units, dropout_value, learning_rate):
''' creates feed forward neral network w/ inputs got from self.change_inputs():
if self.add_attention == True:
creates sites attention and error attention models
(self.sites_attention_model, self.error_attention_model)
'''
m_input = Input( (self.num_sites + self.num_error, self.num_embed) )
m = m_input
if self.add_attention:
m_a = Flatten()(m)
m_a_sites = Dense(units=self.num_sites, activation='softmax', name='sites_attention')(m_a)
m_a_error = Dense(units=self.num_error, activation='softmax', name='error_attention')(m_a)
m_a = Concatenate(axis=1)([m_a_sites, m_a_error])
m_a = Reshape([self.num_sites + self.num_error,1])(m_a)
m = multiply([m, m_a])
m = Flatten()(m)
for _ in range(dense_layers):
m = Dense(units=dense_units, activation='relu')(m)
m = Dropout(dropout_value)(m)
m_output = Dense(self.num_classes, activation='softmax')(m)
self.model = keras.models.Model(inputs=m_input, outputs=m_output)
self.model.compile( loss = weighted_categorical_crossentropy(self.class_weights),
optimizer = keras.optimizers.Adam(lr=learning_rate) )
if self.add_attention:
self.sites_attention_model = keras.models.Model(
inputs = self.model.input,
outputs = self.model.get_layer(name='sites_attention').output
)
self.error_attention_model = keras.models.Model(
inputs = self.model.input,
outputs = self.model.get_layer(name='error_attention').output
)
def pretraining(self, X):
''' creates error and sites embeddings from matrix* (num_error, num_sites):
- self.error_embedding w/ shape (num_error, num_embed)
- self.sites_embedding w/ shape (num_error, num_embed)
* this matrix is from X w/ shape (num_examples, num_error, num_sites)
where everything is summed across 1st axis to (num_error, num_sites)
and all numbers that are greater then 0 are replaced w/ 1
@param X w/ shape (num_examples, num_error, num_sites)
'''
sites_input = Input((self.num_sites,), name='sites_input') # (batch,142,1)
sites_embed = Embedding(self.num_sites, self.num_embed, name='sites_embed')(sites_input) # (batch,142,5)
error_input = Input((self.num_error,), name='error_input') # (batch,54,1)
error_embed = Embedding(self.num_error, self.num_embed, name='error_embed')(error_input) # (batch,54,5)
modl_output = dot(inputs=[error_embed, sites_embed], axes=2) # (batch,54,142)
model = keras.models.Model([error_input, sites_input], modl_output)
model.compile( loss='mse', optimizer = keras.optimizers.Adam(lr=1e-2) )
a = np.arange(self.num_error).reshape((1,self.num_error))
b = np.arange(self.num_sites).reshape((1,self.num_sites))
targets = np.expand_dims( np.sum(X, axis=0), axis=0)
targets[ targets > 0 ] = 1
model.fit(x=[a,b], y=targets, epochs=self.embed_epochs, verbose=0)
self.error_embedding = model.get_layer(name='error_embed').get_weights()[0] # (num_error,num_embed)
self.sites_embedding = model.get_layer(name='sites_embed').get_weights()[0] # (num_sites,num_embed)
def change_inputs(self, X, Y=None):
''' create input from X w/ shape (num_examples, num_error, num_sites)
to X w/ (num_examples, num_error + num_sites, num_embed) where:
1) each error index is converted to (num_embed,) vector from self.pretraining()
and is multiplied by sum of this error at all sites
(this happens at each example separately)
2) each site index is converted to (num_embed,) vector from self.pretraining()
and is multiplied by sum of all errors at this site
(this happens at each example separately)
3) matrix from 1) w/ shape (num_examples, num_error, num_embed)
and 2) w/ shape (num_examples, num_sites, num_embed)
are concatinated to shape (num_examples, num_error + num_sites, num_embed)
@param X w/ shape (num_examples, num_error, num_sites)
@param Y w/ shape (num_examples,)
return (num_examples, num_error + num_sites, num_embed)
'''
X_error = []; X_sites = []
for x in range(len(X)):
e_arr = []
for i in range(self.num_error): # 54
e_sum = np.sum( X[x,i,:] )
e_lat = self.error_embedding[i]
e = e_lat * e_sum
e_arr.append(e)
s_arr = []
for j in range(self.num_sites): # 142
s_sum = np.sum( X[x,:,j] )
s_lat = self.sites_embedding[j]
s = s_lat * s_sum
s_arr.append(s)
X_error.append(np.array(e_arr))
X_sites.append(np.array(s_arr))
X_sites = np.array(X_sites)
X_error = np.array(X_error)
X_new = np.concatenate((X_sites, X_error),axis=1)
if Y is not None:
Y = keras.utils.to_categorical(Y, num_classes=self.num_classes)
return X_new, Y
else:
return X_new
def predict(self, X, argmax=True):
'''
@param X w/ shape (num_examples, num_error, num_sites)
return y_argmax np.array w/ shape (num_examples,), where each number represents class index
or
if self.add_attention == True:
return y_argmax np.array w/ shape (num_examples,) and
sites_att_output np.array w/ shape (num_examples, num_sites), where each
number [0,1] represents how much attention was used on that site
error_att_output np.array w/ shape (num_examples, num_error), where each
number [0,1] represents how much attention was used on that error
'''
X = self.change_inputs(X)
y_pred = self.model.predict(X) # (num_examples, num_outputs)
if argmax:
y_pred = np.argmax(y_pred, axis=-1) # (num_examples,)
if self.add_attention:
sites_att_output = self.sites_attention_model.predict(X) # (num_examples, 142)
error_att_output = self.error_attention_model.predict(X) # (num_examples, 54)
return y_pred, sites_att_output, error_att_output
else:
return y_pred
def set_skopt_dimensions(self):
''' initializes self.dimensions list
!!! order of elements must be the same as self.create_model() params !!!
!!! name fields must be the same as keys in self.model_params dict !!!
'''
self.dimensions = [
Integer( low=1, high=15, name='dense_layers' ),
Integer( low=5, high=75, name='dense_units' ),
Real( low=0.01, high=0.5, name='dropout_value' ),
Real( low=1e-6, high=1e-2, prior='log-uniform', name='learning_rate' )
]
#