Master_Project/LSTM.py at main · saikumar-guru/Master_Project · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 30 21:29:15 2022

@author: saikumar
"""
import pandas as pd
import numpy as np

from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense, Dropout
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import normalize
early_stopping = EarlyStopping()

dataset = pd.read_csv('features/features.csv')
n_steps_in, n_steps_out = 8, 12 # number of input / number of outputs

def MakeInputOutput(dataset):
    # this method will make the base line model dataset which is basically x and y value itself.
    unique_ids = list(np.unique(dataset.id))
    X_values = dataset['x']
    Y_values = dataset['y']
    X_values = np.array(X_values).reshape((len(X_values), 1))
    Y_values = np.array(Y_values).reshape((len(Y_values), 1))
    stack = np.hstack((X_values,Y_values))
    return stack

# split a multivariate sequence into samples
def split_sequences(sequences, n_steps_in, n_steps_out):
    # this function will make the sequence feature and output. in this method I am basically trying to convert time series into supervised structure where the input of first 8 sequence in time series would be the predicted 12 points
	X, y = list(), list()
	for i in range(len(sequences)):
		# find the end of this pattern
		end_ix = i + n_steps_in
		out_end_ix = end_ix + n_steps_out
		# check if we are beyond the dataset
		if out_end_ix > len(sequences):
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix:out_end_ix, :]
		X.append(seq_x)
		y.append(seq_y)
	return np.array(X), np.array(y)

def addNewFeature(X_old, columns, feature_csv):
    # this function i have made in order to add more feature and correspondly train and predict the model
    df = pd.read_csv(feature_csv)
    new_X = [] # current feature
    All_feature = [] # whole feature dataset
    for i in columns:
        new_x = np.array(df[i])
        if sum(np.isnan(new_x))>0:
            new_x = np.nan_to_num(new_x)
        # converting to the n_setpes in features
        for i in range(len(new_x)):
            end_idx = i + n_steps_in
            seqx = new_x[i:end_idx]
            new_X.append(seqx)
    for i in range(len(X_old)):
        x = new_X[i].reshape(n_steps_in,1)
        new_feature_array   = np.append(X_old[i],x,axis=1)
        All_feature.append(new_feature_array)
    print("INFO: allnew features has been added")
    return np.array(All_feature)


stack = MakeInputOutput(dataset)
# normalized the dataset
stack = normalize(stack)
X, y = split_sequences(stack, n_steps_in, n_steps_out)
#X = normalize(X.reshape(X.shape[0], X.shape[1]*X.shape[2])).reshape(X.shape[0], X.shape[1],X.shape[2])

# adding new feature obstED to the base model
#X = addNewFeature(X,['ObstED'],'features/features.csv')
X_train,y_train = X[:],y[:]
X_valid,y_valid = X[:],y[:]
# the dataset knows the number of features, e.g. 2
n_features = X.shape[2]

# define multi-step encoder decoder model
def buildModel():
    model = Sequential(name="lstm_model")
    model.add(LSTM(400, activation='relu', input_shape=(n_steps_in, n_features)))
    model.add(RepeatVector(n_steps_out))
    model.add(LSTM(200, activation='relu', return_sequences=True))
    model.add(TimeDistributed(Dense(2))) # output layer
    #model.add(Bidirectional(LSTM(50, activation='relu'), input_shape=(n_steps_in, n_features)))
    #model.add(Dense(n_features))
    model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['MeanSquaredError', 'MeanAbsoluteError','accuracy'],
              loss_weights=None,
              weighted_metrics=None,
              run_eagerly=None,
              steps_per_execution=None
             )
    return model

def predict(model, x, n_features):
    x = x.reshape(1,8,n_features)
    y_hat = model.predict(x)
    print(y_hat)
    return y_hat

# fit model
if __name__ == "__main__":
    model = buildModel()
    history1 = model.fit(X_train, y_train, epochs=300, verbose=1, callbacks=[early_stopping], validation_data=(X_valid,y_valid))
    model.save('MultiEndec_model')
    try:
        MultiEndec_model = open('MultiEndec_model.txt', 'wt')
        MultiEndec_model.write(str(history1.history))
        MultiEndec_model.close()
        print('MultiEndec_model  summary has been created successfully')
    except:
        print("Unable to write to file")