InverseRL/simulator.py at master · svikramank/InverseRL · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import numpy as np
import pandas as pd
import daytime
import re
from scipy.stats.stats import pearsonr
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import timeit
import pickle
import scipy
from cvxopt import matrix, solvers
from keras.models import Sequential
from keras.layers import Dense
from keras import losses
from keras.optimizers import Adam
import tensorflow as tf


####################################################################################################################################
#################### WRITE A DUMMY SIMULATOR #######################################################################################
####################################################################################################################################

NUM_OF_EPOCHS = 50

def simulator(newdf):
	# This simulator basically takes input as a state s and action a and spits out the next state s' i.e the new state is 'a' is taken in state 's'.
	print("Creating the (s, a, r, s') pairs...")
	ls = []
	l = len(newdf)
	for index, row in newdf.iterrows():
	    if index!= l-1:
	        ls.append(newdf.iloc[index+1, 0])
	    else:
	        break

	newdf.drop(newdf.tail(1).index,inplace=True)
	newdf['next_state'] = ls

	print("Printing the new dataframe with (s, a, r, s') trajectories...")
	print("----------")
	print(newdf.columns)

	# Creating the training data
	X = []
	for index, rows in newdf.iterrows():
		r = [rows[0], [rows[1]]]
		r = sum(r, [])
		X.append(r)
	y = newdf.iloc[:,3]

	X_stack, y_stack = np.stack(X, axis=0), np.stack(y, axis=0)

	print("Creating the model for simulator...")
	model = Sequential()
	model.add(Dense(30, input_dim=len(newdf.iloc[0,0]) + 1, activation= 'relu'))
	model.add(Dense(20, activation='relu'))
	model.add(Dense(len(newdf.iloc[0,0])))
	model.compile(loss='logcosh', optimizer= Adam(lr= 0.0001))

	print("Started training...")
	model.fit(X_stack, y_stack, epochs=NUM_OF_EPOCHS, verbose=1, batch_size=64)
	print("Simulator trained to predict s' from (s,a)...")

	model.save("transition_model.h5")

	return model