-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathoptimize.py
More file actions
41 lines (31 loc) · 1023 Bytes
/
optimize.py
File metadata and controls
41 lines (31 loc) · 1023 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""
Optimizes the hyperparameters using Gaussian process optimization of scikit.
Idea from Ashioto's submission to OpenAI gym
"""
import numpy as np
import skopt
import pickle
import ac
import gym
from State_LFA import state_lfa
env = gym.envs.make('MountainCarContinuous-v0')
phi = state_lfa(env)
def main(params):
policy_lr, value_lr, lamb, exp_buff_len, gamma = params
agent = ac.ActorCritic(env,
phi, exp_buffer_length=100,episodes=1000, gamma=gamma, display=False, lamb=lamb,
policy_lr=policy_lr, value_lr=value_lr)
loss = agent.run()
print("Loss = %f @ params = %s"%(loss, str(params)))
return -loss
if __name__ == "__main__":
params = [
np.logspace(-4, -1, 10),
np.logspace(-4, -1, 10),
np.logspace(-5, -1, 10),
np.linspace(10, 100, 10, dtype=int)
(0.90, 0.99)
]
res = skopt.gp_minimize(func=main, dimensions=params, n_calls=100, verbose=True)
print(res.x, res.fun)
pickle.dump(res, open('res.pkl', 'wb'))