-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathreversi_main.py
More file actions
63 lines (53 loc) · 2.18 KB
/
reversi_main.py
File metadata and controls
63 lines (53 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import gym
import random
import numpy as np
from RL_QG_agent_xiao import RL_QG_agent
env = gym.make('Reversi8x8-v0')
env.reset()
agent = RL_QG_agent()
# agent.load_model()
max_epochs = 1000
win_cnt = 0
for i_episode in range(max_epochs):
observation = env.reset()
# observation 是 3 x 8 x 8 的 list,表示当前的棋局,具体定义在 reversi.py 中的 state
for t in range(100):
action = [1,2]
# action 包含 两个整型数字,action[0]表示下棋的位置,action[1] 表示下棋的颜色(黑棋0或者白棋1)
################### 黑棋 ############################### 0表示黑棋
# 这部分 黑棋 是随机下棋
# env.render() # 打印当前棋局
enables = env.possible_actions
if len(enables) == 0:
action_ = env.board_size**2 + 1
else:
action_ = random.choice(enables)
#action_ = agent.place(observation, enables,player = 0) # 调用自己训练的模型
action[0] = action_
action[1] = 0 # 黑棋 为 0
observation, reward, done, info = env.step(action)
################### 白棋 ############################### 1表示白棋
# env.render()
enables = env.possible_actions
# if nothing to do ,select pass
if len(enables) == 0:
action_ = env.board_size ** 2 + 1 # pass
else:
action_ = agent.place(observation, enables,player = 1) # 调用自己训练的模型
#action_ = random.choice(enables)
action[0] = action_
action[1] = 1 # 白棋 为 1
observation, reward, done, info = env.step(action)
if done: # 游戏 结束
print("Episode finished after {} timesteps".format(t+1))
print(observation)
white_score = len(np.where(env.state[1,:,:]==1)[0])
black_score = len(np.where(env.state[0,:,:]==1)[0])
if black_score <white_score:
print("白棋赢了!")
win_cnt += 1
else:
print("黑棋赢了!")
print(black_score)
break
print(' ' * 60, 'win_cnt', win_cnt, 'total', i_episode + 1)