-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfigure.conf
More file actions
87 lines (81 loc) · 1.57 KB
/
configure.conf
File metadata and controls
87 lines (81 loc) · 1.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
[ENV]
env_name = Catcher
env_num = 50
capacity = 10000
[MONITOR]
wandb = 0
tensorboard = 0
[NETWORK]
layers = 3
hidden_dims = 128,128
[DQN]
# the max iter of training
max_train_steps = 500
# the max steps in one game
max_eposide_step = 500
# the evaluation frequency
evaluate_freq = 10
# learning freqecny in one game
learn_freq = 10
# the evaluation times
evaluate_times = 3
# learning rate
dqn_lr = 0.001
# discounted parameter
gamma = 0.9
epsilon = 0.4
epsilon_min = 0.001
epsilon_decay = 0.0001
# batch size
batch_size = 10000
mini_batch_size = 256
# use lr decay trick
use_lr_decay = True
# the frequency to update target network
update_target = 200
[PPO]
max_train_steps = 500
per_batch_steps = 1000
evaluate_freq = 20
save_freq = 20
batch_size = 10000
mini_batch_size = 512
lr_a = 0.005
lr_c = 0.0002
gamma = 0.98
lamda = 0.95
epsilon = 0.2
use_gae = True
grad_clip_param = 0.5
use_adv_norm = True
use_state_norm = False
use_reward_norm = False
use_reward_scaling = False
entropy_coef = 0.05
use_lr_decay = True
use_grad_clip = True
use_orthogonal_init = True
use_ppo_clip = True
[DDPG]
# the max iter of training
max_train_steps = 500
# the max steps in one game
max_eposide_step = 500
# the evaluation frequency
evaluate_freq = 10
# learning freqecny in one game
learn_freq = 10
# the evaluation times
evaluate_times = 3
# learning rate
dqn_lr = 0.001
# discounted parameter
gamma = 0.9
tau = 0.5
# batch size
batch_size = 10000
mini_batch_size = 256
# use lr decay trick
use_lr_decay = True
# the frequency to update target network
update_target = 200