-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain_test_Step1.py
More file actions
94 lines (69 loc) · 2.34 KB
/
main_test_Step1.py
File metadata and controls
94 lines (69 loc) · 2.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from datetime import datetime
from os import listdir
import gym
import os
import numpy as np
import pickle
from random import randint
from DQNAgent import DQNAgent
from Domain.Action import Action
from Domain.ExperimentType import ExperimentType
from Infrastructure.ExperimentService import ExperimentService
from numpy.random import seed
import tensorflow
seed(10)
import collections
if __name__ == "__main__":
env = gym.make("env:MatrixEnv-v0")
origin = (0, 0)
goal = (0, 3)
# origin = (randint(0, dimension - 1), randint(0, dimension - 1))
# goal = (randint(0, dimension - 1), randint(0, dimension - 1))
env.init_variables(4, origin, goal)
print(env.s)
print("Distance from start to goal is: {}".format(env.distance_from_start_to_goal))
state_size = env.observation_space.n
print("State_size: ",state_size)
action_size = env.action_space.n
print("Action size: ",action_size)
agent = DQNAgent(None,state_size, action_size, False, '')
solved_eps = 0
steps_taken_for_completion = []
episode_rewards = []
my_actions = []
for episode in range(400):
actions = []
episode_reward = []
state = env.reset_action()
print(state)
print('episode {}/{}'.format(episode, 1))
for step in range(60):
action = agent.act(state)
print("---"*2)
print("Action: %s"% Action(action))
print(state)
next_state, reward, done = env.step_action(action)
if Action(action)==Action.DOWN:
done = True
else:
reward =-1
print("Rew: %i",reward)
print(next_state)
episode_reward.append(reward)
# total_reward = agent._predict(state)
agent.memorize(state, action, reward, next_state, done)
state = next_state
actions.append(action)
my_actions.append(action)
if done:
print("Solved - Total Steps: %i"%step)
print(actions)
break
if len(agent.memory) > 10:
agent.replay(5)
break #### ALERTA
episode_rewards.append(episode_reward)
print(collections.Counter(my_actions))
state = env.reset_action()
total_reward = agent._predict(state)
print(total_reward)