Skip to content

Commit 0277bb3

Browse files
committed
Update keras-RL examples to use greedy policy in testing
1 parent f29ae72 commit 0277bb3

4 files changed

+8
-5
lines changed

OpenAI Gym/openAI_massSpringContinuous_test.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@
105105
# even the metrics!
106106
memory = SequentialMemory(limit=2*NUM_STEPS, window_length=1)
107107
# random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
108-
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, dt = env.tau, theta=0.6, mu=0.0, sigma=0.5, sigma_min=0.15, n_steps_annealing=NUM_STEPS)
108+
# random_process = OrnsteinUhlenbeckProcess(size=nb_actions, dt = env.tau, theta=0.6, mu=0.0, sigma=0.5, sigma_min=0.15, n_steps_annealing=NUM_STEPS)
109+
random_process = None # We should always do the best action in testing
109110

110111
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
111112
memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,

OpenAI Gym/openAI_planarCraneContinuous_test.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@
105105
# even the metrics!
106106
memory = SequentialMemory(limit=2*NUM_STEPS, window_length=1)
107107
# random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
108-
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, dt = env.tau, theta=0.6, mu=0.0, sigma=0.5, sigma_min=0.15, n_steps_annealing=NUM_STEPS)
108+
# random_process = OrnsteinUhlenbeckProcess(size=nb_actions, dt = env.tau, theta=0.6, mu=0.0, sigma=0.5, sigma_min=0.15, n_steps_annealing=NUM_STEPS)
109+
random_process = None # We should always do the best action in testing
109110

110111
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
111112
memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,

OpenAI Gym/openAI_planarCraneFeedback_test.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,9 @@
9292
# even the metrics!
9393
memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
9494
# train_policy = BoltzmannQPolicy(tau=0.05)
95-
test_policy = GreedyQPolicy()
9695
train_policy = EpsGreedyQPolicy()
96+
test_policy = GreedyQPolicy()
97+
9798

9899
if DUEL_DQN:
99100
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,

OpenAI Gym/openAI_planarCraneFeedback_train.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@
8181
memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
8282
# train_policy = BoltzmannQPolicy(tau=0.05)
8383
train_policy = EpsGreedyQPolicy()
84-
test_policy = EpsGreedyQPolicy()
85-
# test_policy = GreedyQPolicy()
84+
#test_policy = EpsGreedyQPolicy()
85+
test_policy = GreedyQPolicy()
8686

8787
if DUEL_DQN:
8888
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,

0 commit comments

Comments
 (0)