Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
93 commits
Select commit Hold shift + click to select a range
f40e653
x
1itttlesheep Dec 8, 2021
9c7d638
变更
Nyrus-Y Dec 11, 2021
045bf7e
szj pull
1itttlesheep Dec 12, 2021
981cfb2
add reward
1itttlesheep Dec 12, 2021
4f30827
Merge remote-tracking branch 'origin/szjdev' into szjdev
oxFFFF-Q Dec 12, 2021
b443c9e
update_reward
oxFFFF-Q Dec 13, 2021
b625537
winrate
1itttlesheep Dec 13, 2021
f9d3121
merge
1itttlesheep Dec 13, 2021
35b58f2
winrate
1itttlesheep Dec 13, 2021
4d2b88f
r_kick
1itttlesheep Dec 13, 2021
c3c601e
s
1itttlesheep Dec 13, 2021
e3ec287
update_reward
oxFFFF-Q Dec 14, 2021
8050a2b
save model
1itttlesheep Dec 14, 2021
3ed3d6d
merge
1itttlesheep Dec 14, 2021
1dc46da
merge
1itttlesheep Dec 14, 2021
d1993ec
变更
Nyrus-Y Dec 14, 2021
cd69c5c
update_reward
oxFFFF-Q Dec 15, 2021
0733cd5
save optom
1itttlesheep Dec 15, 2021
51653b7
变更
Nyrus-Y Dec 15, 2021
49e8aba
Merge remote-tracking branch 'origin/szjdev' into szjdev
Nyrus-Y Dec 15, 2021
e9a12f7
update_reward
oxFFFF-Q Dec 16, 2021
49c0611
Merge remote-tracking branch 'origin/szjdev' into szjdev
oxFFFF-Q Dec 30, 2021
915614a
update_reward
oxFFFF-Q Jan 3, 2022
72eefa2
变更
Nyrus-Y Jan 3, 2022
c6fa3c7
变更
Nyrus-Y Jan 3, 2022
912f08b
变更
Nyrus-Y Jan 3, 2022
e23a7c5
multi
1itttlesheep Jan 4, 2022
fcea481
update_reward
oxFFFF-Q Jan 4, 2022
11c3d71
action select
1itttlesheep Jan 5, 2022
8585b9a
action select
1itttlesheep Jan 5, 2022
a8f690f
s
1itttlesheep Jan 5, 2022
3faf673
update
oxFFFF-Q Jan 6, 2022
4eb738c
add reward for die
1itttlesheep Jan 6, 2022
9b80c87
s
1itttlesheep Jan 6, 2022
96f517b
update
oxFFFF-Q Jan 6, 2022
28002c4
变更
Nyrus-Y Jan 6, 2022
a443e25
update
oxFFFF-Q Jan 7, 2022
0a85062
update
oxFFFF-Q Jan 7, 2022
61527a6
szj
1itttlesheep Jan 7, 2022
7d8f839
update
oxFFFF-Q Jan 7, 2022
caf9f6c
update
oxFFFF-Q Jan 7, 2022
2947cc6
变更
Nyrus-Y Jan 12, 2022
23c58dd
变更
Nyrus-Y Jan 12, 2022
fbafe64
变更
Nyrus-Y Jan 12, 2022
ec924be
变更
Nyrus-Y Jan 13, 2022
e426d18
变更
Nyrus-Y Jan 13, 2022
ff37460
reward
1itttlesheep Jan 16, 2022
5291d5a
变更
Nyrus-Y Jan 17, 2022
790c3e7
Delete DQNAgent_modified_pre.py
Nyrus-Y Jan 19, 2022
2b76071
Delete DQNAgent_baseline.py
Nyrus-Y Jan 19, 2022
a36735d
Delete main_pretrained.py
Nyrus-Y Jan 19, 2022
293032d
Delete main.py
Nyrus-Y Jan 19, 2022
2e2e6dd
Delete main_radio.py
Nyrus-Y Jan 19, 2022
b374396
Delete result.csv
Nyrus-Y Jan 19, 2022
ea2df53
Delete reward.csv
Nyrus-Y Jan 19, 2022
0e13e01
Delete test.py
Nyrus-Y Jan 19, 2022
f87b8de
变更
Nyrus-Y Jan 19, 2022
45a51f2
Add files via upload
Nyrus-Y Jan 19, 2022
becd675
Add files via upload
Nyrus-Y Jan 19, 2022
ca94b00
ddqn
1itttlesheep Jan 20, 2022
a759e2c
merge szj
1itttlesheep Jan 20, 2022
c4ef497
ddqn
1itttlesheep Jan 21, 2022
9ddcff0
变更
Nyrus-Y Mar 9, 2022
6862f66
变更
Nyrus-Y Mar 9, 2022
b6319a9
变更
Nyrus-Y Mar 10, 2022
e27a927
变更
Nyrus-Y Mar 10, 2022
4704c89
Update README.md
1itttlesheep Mar 10, 2022
ff153ee
Update README.md
1itttlesheep Mar 10, 2022
ff0836d
Update README.md
1itttlesheep Mar 10, 2022
0e1ec53
Update README.md
1itttlesheep Mar 10, 2022
903a46b
Update README.md
1itttlesheep Mar 10, 2022
58b6c78
Update README.md
1itttlesheep Mar 10, 2022
fe6d7a6
Update README.md
1itttlesheep Mar 10, 2022
c44f11c
Update README.md
1itttlesheep Mar 10, 2022
6e26407
Update README.md
1itttlesheep Mar 10, 2022
184fbee
Update README.md
1itttlesheep Mar 10, 2022
4738537
Update README.md
1itttlesheep Mar 10, 2022
afc7e83
Update README.md
1itttlesheep Mar 10, 2022
13856df
Update README.md
1itttlesheep Mar 10, 2022
80abd86
merge origin
1itttlesheep Mar 12, 2022
ace0904
变更
Nyrus-Y Mar 12, 2022
9a191c9
Merge branch 'tong' of https://github.com/oxFFFF-Q/Project_AI into tong
1itttlesheep Mar 12, 2022
6167d9f
readme
1itttlesheep Mar 12, 2022
db936bf
变更
Nyrus-Y Mar 13, 2022
5239091
变更
Nyrus-Y Mar 13, 2022
3aec434
Merge branch 'tong' of https://github.com/oxFFFF-Q/Project_AI into tong
1itttlesheep Mar 13, 2022
cc793f5
Update README.md
1itttlesheep Mar 13, 2022
e57da24
Update README.md
1itttlesheep Mar 13, 2022
8dd6965
Update README.md
1itttlesheep Mar 13, 2022
673cc9b
Update README.md
1itttlesheep Mar 13, 2022
3bf5c3c
readme
1itttlesheep Mar 14, 2022
a6c0d85
Delete main_save_model.py
Nyrus-Y Mar 14, 2022
ad4fd39
Add files via upload
Nyrus-Y Mar 14, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .idea/Project_AI.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

150 changes: 150 additions & 0 deletions DQN_mulit_tensorflow_2/DQNAgent_ddqn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import tensorflow.keras as keras
from keras.layers import Dense, Flatten, Conv2D
from keras import Sequential
from tensorflow.keras.optimizers import Adam
from pommerman.agents import BaseAgent
from pommerman.agents.simple_agent import SimpleAgent
from pommerman import characters

from gym.spaces import Discrete

import constants
from replay_memory import replay_Memory
import numpy as np
import tensorflow as tf


class DQNAgent(BaseAgent):
"""DQN second try with keras"""

def __init__(self, character=characters.Bomber):
super(DQNAgent, self).__init__(character)
self.baseAgent = SimpleAgent()

self.training_model = self.new_model()
self.trained_model = self.new_model()

self.trained_model.set_weights(self.training_model.get_weights())
#self.load_weights()

self.epsilon = constants.epsilon
self.min_epsilon = constants.MIN_EPSILON
self.eps_decay = constants.EPSILON_DECAY
self.buffer = replay_Memory(constants.MAX_BUFFER_SIZE)
self.update_counter = 0
self.V = keras.layers.Dense(1,activation=None)
self.A = keras.layers.Dense(6,activation=None)

def new_model(self):

model = Sequential()
input_shape = (constants.MINIBATCH_SIZE, 18, 11, 11,)
model.add(Conv2D(256, 3, (1, 1), input_shape=input_shape[1:], activation="relu", data_format="channels_first",
padding="same"))
# print(model.output_shape)
model.add(Conv2D(256, 3, (1, 1), activation="relu", data_format="channels_first", padding="same"))
# print(model.output_shape)
model.add(Conv2D(256, 3, (1, 1), activation="relu", data_format="channels_first", padding="same"))
# print(model.output_shape)

model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dense(64, activation='linear'))
model.compile(loss="mse", optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])
model.summary()
return model

def dueling(self, state):
V = self.V(state)
# advantage value
A = self.A(state)
mean = tf.math.reduce_mean(A, axis=1, keepdims=True)
# output
output = V + (A - mean)
return output

def advantage(self, state):
A = self.A(state)
return A

def act(self, obs, action_space):
return self.baseAgent.act(obs, Discrete(6))

def train(self):

if self.buffer.size() < constants.MIN_REPLAY_MEMORY_SIZE:
return

#dueling

current_states, action, reward, new_states, done = self.buffer.sample_element(constants.MINIBATCH_SIZE)

# 在样品中取 current_states, 从模型中获取Q值
current_states_q = self.dueling(self.training_model.predict(current_states))
double_new_q = self.dueling(self.training_model.predict(new_states))
# 在样品中取 next_state, 从旧网络中获取Q值
new_states_q = self.dueling(self.trained_model.predict(new_states))

# X为state,Y为所预测的action
states = []
actions = []

for index in range(constants.MINIBATCH_SIZE):

if done[index] != True:
# 更新Q值
#new_state_q = reward[index] + constants.DISCOUNT * np.max(new_states_q[index])
double_new_q = reward[index] + constants.DISCOUNT * new_states_q[index][np.argmax(double_new_q[index])]
else:
#new_state_q = reward[index]
double_new_q = reward[index]
# 在给定的states下更新Q值
current_better_q = current_states_q[index]
current_better_q[action[index]] = double_new_q

# 添加训练数据
states.append(current_states[index])
actions.append(current_better_q)

# 开始训练
# 使用专用的数据api,但更慢.
# states = tf.reshape(states, (-1, 12, 8, 8))
# train_dataset = tf.data.Dataset.from_tensor_slices((states, actions))
# self.training_model.fit(train_dataset, verbose=0, shuffle=False)

self.training_model.fit(np.array(states), np.array(actions), batch_size=constants.MINIBATCH_SIZE, verbose=0,
shuffle=False)

# 更新网络更新计数器
if done:
self.update_counter += 1

# 网络更新计数器达到上限,更新网络
if self.update_counter > constants.UPDATE_EVERY:
self.trained_model.set_weights(self.training_model.get_weights())
self.update_counter = 0

def action_choose(self, state):
state_reshape = tf.reshape(state, (-1, 18, 11, 11))
q_table = self.advantage(self.training_model.predict(state_reshape))
return q_table
# epsilon衰减

def epsilon_decay(self):
self.epsilon = self.epsilon * self.eps_decay if self.epsilon > self.min_epsilon else self.epsilon

def save_weights(self, numOfEpisode):

# 完成训练后存档参数
if numOfEpisode % 200 == 0:
self.training_model.save_weights(('./checkpoints/FFA{:}/FFA{:}'.format(numOfEpisode, numOfEpisode)))
# self.training_model.save_weights(('./checkpoints/FFA-test-1/FFA-test-1'.format(numOfEpisode, numOfEpisode)))
print("weights saved!")

def load_weights(self):
self.training_model.load_weights('./checkpoints/FFA2200/FFA2200')
self.trained_model.load_weights('./checkpoints/FFA2200/FFA2200')
print("weights loaded!")

def save_model(self):
self.training_model.save("./second_model")
133 changes: 133 additions & 0 deletions DQN_mulit_tensorflow_2/DQNAgent_modified.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
from keras.layers import Dense, Flatten, Conv2D
from keras import Sequential
from tensorflow.keras.optimizers import Adam
from pommerman.agents import BaseAgent
from pommerman.agents.simple_agent import SimpleAgent
from pommerman import characters

from gym.spaces import Discrete

from DQN_mulit_tensorflow_2 import constants
from replay_memory import replay_Memory
import numpy as np
import tensorflow as tf


class DQNAgent(BaseAgent):
"""DQN second try with keras"""

def __init__(self, character=characters.Bomber):
super(DQNAgent, self).__init__(character)
self.baseAgent = SimpleAgent()

self.training_model = self.new_model()
self.trained_model = self.new_model()

self.trained_model.set_weights(self.training_model.get_weights())
self.load_weights()

self.epsilon = constants.epsilon
self.min_epsilon = constants.MIN_EPSILON
self.eps_decay = constants.EPSILON_DECAY
self.buffer = replay_Memory(constants.MAX_BUFFER_SIZE)
self.update_counter = 0

def new_model(self):

model = Sequential()
input_shape = (constants.MINIBATCH_SIZE, 18, 11, 11,)
model.add(Conv2D(256, 3, (1, 1), input_shape=input_shape[1:], activation="relu", data_format="channels_first",
padding="same"))
# print(model.output_shape)
model.add(Conv2D(256, 3, (1, 1), activation="relu", data_format="channels_first", padding="same"))
# print(model.output_shape)
model.add(Conv2D(256, 3, (1, 1), activation="relu", data_format="channels_first", padding="same"))
# print(model.output_shape)

model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dense(6, activation='linear'))
model.compile(loss="mse", optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])
model.summary()
return model



def act(self, obs, action_space):
return self.baseAgent.act(obs, Discrete(6))

def train(self):

if self.buffer.size() < constants.MIN_REPLAY_MEMORY_SIZE:
return

current_states, action, reward, new_states, done = self.buffer.sample_element(constants.MINIBATCH_SIZE)

# 在样品中取 current_states, 从模型中获取Q值
current_states_q = self.training_model.predict(current_states)
double_new_q = self.training_model.predict(new_states)
# 在样品中取 next_state, 从旧网络中获取Q值
new_states_q = self.trained_model.predict(new_states)

# X为state,Y为所预测的action
states = []
actions = []

for index in range(constants.MINIBATCH_SIZE):

if done[index] != True:
# 更新Q值
new_state_q = reward[index] + constants.DISCOUNT * np.max(new_states_q[index])
double_new_q = reward[index] + constants.DISCOUNT * new_states_q[index][np.argmax(double_new_q[index])]
else:
new_state_q = reward[index]
# 在给定的states下更新Q值
current_better_q = current_states_q[index]
current_better_q[action[index]] = new_state_q

# 添加训练数据
states.append(current_states[index])
actions.append(current_better_q)

# 开始训练
# 使用专用的数据api,但更慢.
# states = tf.reshape(states, (-1, 12, 8, 8))
# train_dataset = tf.data.Dataset.from_tensor_slices((states, actions))
# self.training_model.fit(train_dataset, verbose=0, shuffle=False)

self.training_model.fit(np.array(states), np.array(actions), batch_size=constants.MINIBATCH_SIZE, verbose=0,
shuffle=False)

# 更新网络更新计数器
if done:
self.update_counter += 1

# 网络更新计数器达到上限,更新网络
if self.update_counter > constants.UPDATE_EVERY:
self.trained_model.set_weights(self.training_model.get_weights())
self.update_counter = 0

def action_choose(self, state):
state_reshape = tf.reshape(state, (-1, 18, 11, 11))
q_table = self.training_model.predict(state_reshape)
return q_table
# epsilon衰减

def epsilon_decay(self):
self.epsilon = self.epsilon * self.eps_decay if self.epsilon > self.min_epsilon else self.epsilon

def save_weights(self, numOfEpisode):

# 完成训练后存档参数
if numOfEpisode % 200 == 0:
self.training_model.save_weights(('./checkpoints/FFA{:}/FFA{:}'.format(numOfEpisode, numOfEpisode)))
# self.training_model.save_weights(('./checkpoints/FFA-test-1/FFA-test-1'.format(numOfEpisode, numOfEpisode)))
print("weights saved!")

def load_weights(self):
self.training_model.load_weights('./checkpoints/FFA2200/FFA2200')
self.trained_model.load_weights('./checkpoints/FFA2200/FFA2200')
print("weights loaded!")

def save_model(self):
self.training_model.save("./second_model")
Loading