-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
65 lines (42 loc) · 1.82 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import argparse
from nes import NESOptimizer
import gym
import matplotlib.pyplot as plt
import json
parser = argparse.ArgumentParser()
parser.add_argument('-p', '--n-episodes', default=10, type=int,
help="Number of episodes in batch")
parser.add_argument('-e', '--env-id', type=str, default="CartPole-v1",
help="Environment id")
parser.add_argument('-a', '--alpha', type=float, default=0.1, help="Learning rate")
parser.add_argument('-s', '--sigma', type=float, default=0.1, help="Noise scale")
parser.add_argument('-b', '--n-batches', type=int, default=100,
help="Number of batches")
parser.add_argument('-v', '--verbose', type=bool, default=True,
help="Show or not intermediate results")
parser.add_argument('-r', '--render', type=bool, default=False,
help="Render environment")
parser.add_argument('-k', '--api-key', type=str, default='',
help="API key to upload solution")
parser.add_argument('-f', '--file', type=str, default='',
help="File to save results")
def train(args):
env = gym.make(args.env_id)
if len(args.api_key) > 0:
env = gym.wrappers.Monitor(env, './tmp', force=True)
nes = NESOptimizer(env, args.alpha, args.sigma)
w, history = nes.optimize(env, args.n_batches, args.n_episodes, args.verbose, args.render)
env.close()
if len(args.api_key) > 0:
gym.upload('./tmp', api_key=args.api_key)
return history, w
if __name__ == "__main__":
args = parser.parse_args()
reward_history, w = train(args)
if len(args.file) == 0:
plt.plot(reward_history)
plt.show()
else:
results = {'history': reward_history, 'weights': w.tolist()}
with open(args.file, 'w') as f:
json.dump(results, f)