-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFrozenLake.py
More file actions
53 lines (37 loc) · 1 KB
/
FrozenLake.py
File metadata and controls
53 lines (37 loc) · 1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import gym
import numpy as np
import time
import os
env = gym.make('FrozenLake8x8-v0')
#
np.set_printoptions(linewidth=np.nan, threshold=np.nan, formatter={'float_kind': lambda x: "%6.4f" % x})
# Q[s,a]
Q = np.zeros((env.observation_space.n, env.action_space.n))
alpha = 0.01 # learning rate
gamma = 0.990 # decay factor
ep = 0
for ep in range(10000000):
o = env.reset()
r = None
t = 0
for t in range(200):
s0 = o
if ep > 0 and ep % 100000 == 0:
a = np.argmax(Q[s0])
time.sleep(1)
else:
a = env.action_space.sample()
# a = 2
a0 = a
o, r, d, i = env.step(a)
if ep % 100000 == 0:
os.system('cls')
print(ep, t)
env.render()
s1 = o
Q[s0, a0] += alpha * (r + gamma * np.max(Q[s1, :]) - Q[s0, a0])
if ep % 100000 == 0:
print("a:{}, o:{}, r:{}, d:{}, i:{}".format(a, o, r, d, i))
print(Q)
if d:
break