Skip to content

Commit 3860095

Browse files
committed
Continued refinement to reward functions and contiuous input RL algorithm.
1 parent 6d963df commit 3860095

11 files changed

+124
-95
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:1440f3a5fb63268d50a5c8fe7d5716a48707cb2c70bf4730a7de97dc037d2f34
3+
size 12658816

OpenAI Gym/keras-rl_FileLogger_processing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import json # the data files generated are json
2929

3030
# TODO: 07/13/17 - JEV - Add GUI, argparse, or CLI for selecting file
31-
FILENAME = 'logs/duel_dqn_planar_crane-v0_log_128_4_100000_2017-07-13_215831.json'
31+
FILENAME = 'logs/ddpg_planar_crane_continuous-v0_log_2048_3_100000_2017-07-14_175832.json'
3232

3333

3434

OpenAI Gym/openAI_montior_processing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import json # the data files generated are json
2929

3030
# TODO: 07/12/17 - JEV - Add GUI, argparse, or CLI for selecting file
31-
FILENAME = "example_data/duel_dqn_planar_crane-v0_monitor_2048_4_2500000_2017-07-12_194502/openaigym.episode_batch.0.41178.stats.json"
31+
FILENAME = "example_data/duel_dqn_planar_crane-v0_monitor_1024_4_100000_2017-07-13_222427/openaigym.episode_batch.0.5356.stats.json"
3232

3333

3434

OpenAI Gym/openAI_planarCraneContinuous_episodeDataProcessing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import numpy as np
2626
import matplotlib.pyplot as plt
2727

28-
FILENAME = 'example_data/EpisodeData_2017-07-13_195043.csv'
28+
FILENAME = 'example_data/EpisodeData_2017-07-14_190523.csv'
2929
CABLE_LENGTH = 2.0
3030

3131
# Files have data saved as:

OpenAI Gym/openAI_planarCraneContinuous_test.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,13 @@
4444

4545
LAYER_SIZE = 2048
4646
NUM_HIDDEN_LAYERS = 3
47-
NUM_STEPS = 50000
47+
NUM_STEPS = 1000000
4848
TRIAL_ID = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')
4949

5050
# TODO: Add file picker GUI - For now, look for files with the format below
5151
# Remove the _actor or _critic from the filename. The load method automatically
5252
# appends these.
53-
FILENAME = 'weights/ddpg_planar_crane_continuous-v0_weights_2048_3_50000_2017-07-13_200743.h5f'
53+
FILENAME = 'weights/ddpg_planar_crane_continuous-v0_weights_2048_3_100000_2017-07-14_175832.h5f'
5454

5555
# Get the environment and extract the number of actions.
5656
env = gym.make(ENV_NAME)
@@ -60,7 +60,12 @@
6060
env.SAVE_DATA = True
6161
env.MAX_STEPS = 500
6262

63-
63+
MONITOR_FILENAME = 'example_data/ddpg_{}_monitor_{}_{}_{}_{}'.format(ENV_NAME,
64+
LAYER_SIZE,
65+
NUM_HIDDEN_LAYERS,
66+
NUM_STEPS,
67+
TRIAL_ID)
68+
env = gym.wrappers.Monitor(env, MONITOR_FILENAME, force=True)
6469

6570
# Next, we build a very simple actor model.
6671
actor = Sequential()
@@ -99,7 +104,8 @@
99104
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
100105
# even the metrics!
101106
memory = SequentialMemory(limit=2*NUM_STEPS, window_length=1)
102-
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
107+
# random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
108+
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=0.6, mu=0., sigma=.1)
103109
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
104110
memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
105111
random_process=random_process, gamma=.99, target_model_update=1e-3)

OpenAI Gym/openAI_planarCraneContinuous_train.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# File to train on the CRAWLAB custom OpenAI planar crane environment
77
#
88
# Requires:
9-
# * CRAWLAB planar_crane Open_AI environment folder to be in the same as this file
9+
# * CRAWLAB planar_crane_continuous Open_AI environment folder to be in the same as this file
1010
# * keras, openAI gym, keras-rl packages (all are pip or conda installable)
1111
#
1212
# NOTE: Any plotting is set up for output, not viewing on screen.
@@ -45,7 +45,7 @@
4545

4646
LAYER_SIZE = 2048
4747
NUM_HIDDEN_LAYERS = 3
48-
NUM_STEPS = 50000
48+
NUM_STEPS = 100000
4949
TRIAL_ID = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')
5050

5151
# Get the environment and extract the number of actions.
@@ -87,7 +87,7 @@
8787
print(actor.summary())
8888

8989

90-
# critic model
90+
# critic model - TODO: 07/14/17 - update this to sequential model style
9191
action_input = Input(shape=(nb_actions,), name='action_input')
9292
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
9393
flattened_observation = Flatten()(observation_input)
@@ -113,22 +113,22 @@
113113
memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
114114
random_process=random_process, gamma=.99, target_model_update=1e-3)
115115

116-
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
116+
agent.compile(Adam(lr=.001), metrics=['mae'])
117117

118118

119119

120120

121121
# Optionally, we can reload a previous model's weights and continue training from there
122122
# Remove the _actor or _critic from the filename. The load method automatically
123-
# appends these.
124-
# WEIGHTS_FILENAME = 'weights/ddpg_planar_crane_continuous-v0_weights_32_4_50000_2017-07-13_134945.h5f'
125-
# agent.load_weights(WEIGHTS_FILENAME)
123+
# appends these.
124+
WEIGHTS_FILENAME = 'weights/ddpg_planar_crane_continuous-v0_weights_2048_3_100000_2017-07-14_171736.h5f'
125+
agent.load_weights(WEIGHTS_FILENAME)
126126

127127

128128
callbacks = []
129-
# checkpoint_weights_filename = 'weights/ddpg_{}_checkpointWeights_{{step}}_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
129+
checkpoint_weights_filename = 'weights/ddpg_{}_checkpointWeights_{{step}}_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
130130
log_filename = 'logs/ddpg_{}_log_{}_{}_{}_{}.json'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
131-
# callbacks += [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=10000)]
131+
#callbacks += [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=100000)]
132132
callbacks += [FileLogger(log_filename, interval=100)]
133133

134134
# Okay, now it's time to learn something! We visualize the training here for show, but this

OpenAI Gym/openAI_planarCrane_episodeDataProcessing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import numpy as np
2626
import matplotlib.pyplot as plt
2727

28-
FILENAME = 'example_data/EpisodeData_2017-07-14_001641.csv'
28+
FILENAME = 'example_data/EpisodeData_2017-07-14_001608.csv'
2929
CABLE_LENGTH = 2.0
3030

3131
# Files have data saved as:

OpenAI Gym/openAI_planarCrane_test.py

+40-34
Original file line numberDiff line numberDiff line change
@@ -35,44 +35,35 @@
3535
from keras.layers import Dense, Activation, Flatten
3636
from keras.optimizers import Adam
3737

38+
from rl.agents.cem import CEMAgent
3839
from rl.agents.dqn import DQNAgent
40+
from rl.agents.sarsa import SarsaAgent
3941
from rl.policy import BoltzmannQPolicy, GreedyQPolicy, EpsGreedyQPolicy
40-
from rl.memory import SequentialMemory
42+
from rl.memory import SequentialMemory, EpisodeParameterMemory
4143

4244

4345
ENV_NAME = 'planar_crane-v0'
4446

4547
LAYER_SIZE = 1024
4648
NUM_HIDDEN_LAYERS = 4
4749
NUM_STEPS = 100000
48-
DUEL_DQN = True
50+
METHOD = 'CEM' # can be DQN, DUEL_DQN, SARSA, or CEM
4951
TRIAL_ID = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')
5052

51-
# TODO: Add file picker GUI - For now, look for files with the format below
52-
# FILENAME = 'weights/dqn_{}_weights_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_STEPS, TRIAL_ID)
53-
# FILENAME = 'weights/dqn_{}_weights_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_STEPS)
54-
FILENAME = 'weights/duel_dqn_planar_crane-v0_weights_1024_4_100000_2017-07-13_222427.h5f'
53+
# TODO: 07/14/17 - JEV - Add GUI, argparser, or CLI for this selection
54+
WEIGHT_FILENAME = 'weights/SARSA_planar_crane-v0_weights_1024_4_100000_2017-07-14_160523.h5f'
55+
56+
# Define the filenames to use for this session
57+
MONITOR_FILENAME = 'example_data/{}_{}_monitor_{}_{}_{}_{}'.format(METHOD, ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
5558

5659
# Get the environment and extract the number of actions.
5760
env = gym.make(ENV_NAME)
5861

5962
# Record episode data?
60-
env.SAVE_DATA = True
61-
62-
# uncomment to record data about the training session, including video if visualize is true
63-
if DUEL_DQN:
64-
MONITOR_FILENAME = 'example_data/duel_dqn_{}_monitor_{}_{}_{}_{}'.format(ENV_NAME,
65-
LAYER_SIZE,
66-
NUM_HIDDEN_LAYERS,
67-
NUM_STEPS,
68-
TRIAL_ID)
69-
else:
70-
MONITOR_FILENAME = 'example_data/dqn_{}_monitor_{}_{}_{}_{}'.format(ENV_NAME,
71-
LAYER_SIZE,
72-
NUM_HIDDEN_LAYERS,
73-
NUM_STEPS,
74-
TRIAL_ID)
75-
# env = gym.wrappers.Monitor(env, MONITOR_FILENAME, force=True)
63+
env.SAVE_DATA = False
64+
65+
# uncomment to record data about the training session, including video if video_callable is true
66+
env = gym.wrappers.Monitor(env, MONITOR_FILENAME, video_callable=False, force=True)
7667

7768

7869
# np.random.seed(123)
@@ -101,26 +92,41 @@
10192
# even the metrics!
10293
memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
10394
# train_policy = BoltzmannQPolicy(tau=0.05)
104-
test_policy = EpsGreedyQPolicy()
105-
train_policy = GreedyQPolicy()
95+
train_policy = EpsGreedyQPolicy()
96+
test_policy = GreedyQPolicy()
10697

107-
if DUEL_DQN:
108-
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
98+
99+
# Compile the agent based on method specified. We use .upper() to convert to
100+
# upper case for comparison
101+
if METHOD.upper() == 'DUEL_DQN':
102+
agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
109103
enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2,
110104
policy=train_policy, test_policy=test_policy)
111-
112-
filename = 'weights/duel_dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
113-
else:
114-
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
105+
agent.compile(Adam(lr=1e-3), metrics=['mae'])
106+
107+
elif METHOD.upper() == 'DQN':
108+
agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
115109
target_model_update=1e-2, policy=train_policy, test_policy=test_policy)
110+
agent.compile(Adam(lr=1e-3), metrics=['mae'])
111+
112+
elif METHOD.upper() == 'SARSA':
113+
# SARSA does not require a memory.
114+
policy = BoltzmannQPolicy()
115+
agent = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=train_policy)
116+
agent.compile(Adam(lr=1e-3), metrics=['mae'])
116117

117-
filename = 'weights/dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
118+
elif METHOD.upper() == 'CEM':
119+
agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
120+
batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
121+
agent.compile()
122+
123+
else:
124+
raise('Please select DQN, DUEL_DQN, SARSA, or CEM for your method type.')
118125

119126

120-
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
121127

122128
# Load the model weights
123-
dqn.load_weights(FILENAME)
129+
agent.load_weights(WEIGHT_FILENAME)
124130

125131
# Finally, evaluate our algorithm for 1 episode.
126-
dqn.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=501)
132+
agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500)

OpenAI Gym/openAI_planarCrane_train.py

+45-39
Original file line numberDiff line numberDiff line change
@@ -35,40 +35,35 @@
3535
from keras.layers import Dense, Activation, Flatten
3636
from keras.optimizers import Adam
3737

38+
from rl.agents.cem import CEMAgent
3839
from rl.agents.dqn import DQNAgent
40+
from rl.agents.sarsa import SarsaAgent
3941
from rl.policy import BoltzmannQPolicy, GreedyQPolicy, EpsGreedyQPolicy
40-
from rl.memory import SequentialMemory
42+
from rl.memory import SequentialMemory, EpisodeParameterMemory
4143
from rl.callbacks import FileLogger, ModelIntervalCheckpoint
4244

4345
ENV_NAME = 'planar_crane-v0'
4446

4547
LAYER_SIZE = 1024
4648
NUM_HIDDEN_LAYERS = 4
47-
NUM_STEPS = 100000
48-
DUEL_DQN = True
49+
NUM_STEPS = 50000
50+
METHOD = 'DUEL_DQN' # can be DQN, DUEL_DQN, SARSA, or CEM
4951
TRIAL_ID = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')
5052

53+
# Define the filenames to use for this session
54+
WEIGHT_FILENAME = 'weights/{}_{}_weights_{}_{}_{}_{}.h5f'.format(METHOD, ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
55+
CHECKPOINT_WEIGHTS_FILENAME = 'weights/{}_{}_checkpointWeights_{{step}}_{}_{}_{}_{}.h5f'.format(METHOD, ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
56+
LOG_FILENAME = 'logs/{}_{}_log_{}_{}_{}_{}.json'.format(METHOD, ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
57+
MONITOR_FILENAME = 'example_data/{}_{}_monitor_{}_{}_{}_{}'.format(METHOD, ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
58+
59+
5160
# Get the environment and extract the number of actions.
5261
env = gym.make(ENV_NAME)
5362

5463
# Record episode data?
5564
env.SAVE_DATA = False
5665

57-
# uncomment to record data about the training session, including video if visualize is true
58-
59-
# uncomment to record data about the training session, including video if visualize is true
60-
if DUEL_DQN:
61-
MONITOR_FILENAME = 'example_data/duel_dqn_{}_monitor_{}_{}_{}_{}'.format(ENV_NAME,
62-
LAYER_SIZE,
63-
NUM_HIDDEN_LAYERS,
64-
NUM_STEPS,
65-
TRIAL_ID)
66-
else:
67-
MONITOR_FILENAME = 'example_data/dqn_{}_monitor_{}_{}_{}_{}'.format(ENV_NAME,
68-
LAYER_SIZE,
69-
NUM_HIDDEN_LAYERS,
70-
NUM_STEPS,
71-
TRIAL_ID)
66+
# uncomment to record data about the training session, including video if video_callable is true
7267
env = gym.wrappers.Monitor(env, MONITOR_FILENAME, video_callable=False, force=True)
7368

7469
np.random.seed(123)
@@ -91,50 +86,61 @@
9186
model.add(Activation('linear'))
9287
print(model.summary())
9388

89+
9490
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
9591
# even the metrics!
96-
memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
92+
9793
# train_policy = BoltzmannQPolicy(tau=0.05)
9894
train_policy = EpsGreedyQPolicy()
9995
test_policy = GreedyQPolicy()
10096

101-
if DUEL_DQN:
102-
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
97+
# Compile the agent based on method specified. We use .upper() to convert to
98+
# upper case for comparison
99+
if METHOD.upper() == 'DUEL_DQN':
100+
memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
101+
agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
103102
enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2,
104103
policy=train_policy, test_policy=test_policy)
105-
106-
filename = 'weights/duel_dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
107-
checkpoint_weights_filename = 'logs/duel_dqn_{}_checkpointWeights_{{step}}_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
108-
log_filename = 'logs/duel_dqn_{}_log_{}_{}_{}_{}.json'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
104+
agent.compile(Adam(lr=1e-3), metrics=['mae'])
109105

110-
else:
111-
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
106+
elif METHOD.upper() == 'DQN':
107+
memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
108+
agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
112109
target_model_update=1e-2, policy=train_policy, test_policy=test_policy)
113-
114-
filename = 'weights/dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
115-
checkpoint_weights_filename = 'weights/dqn_{}_checkpointWeights_{{step}}_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
116-
log_filename = 'logs/dqn_{}_log_{}_{}_{}_{}.json'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
110+
agent.compile(Adam(lr=1e-3), metrics=['mae'])
117111

112+
elif METHOD.upper() == 'SARSA':
113+
# SARSA does not require a memory.
114+
agent = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=train_policy)
115+
agent.compile(Adam(lr=1e-3), metrics=['mae'])
116+
117+
elif METHOD.upper() == 'CEM':
118+
memory = EpisodeParameterMemory(limit=1000, window_length=1)
119+
agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
120+
batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
121+
agent.compile()
122+
123+
else:
124+
raise('Please select DQN, DUEL_DQN, SARSA, or CEM for your method type.')
118125

119126

120-
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
121127

122128
callbacks = []
123-
callbacks += [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=10000)]
124-
callbacks += [FileLogger(log_filename, interval=100)]
129+
# callbacks += [ModelIntervalCheckpoint(CHECKPOINT_WEIGHTS_FILENAME, interval=10000)]
130+
callbacks += [FileLogger(LOG_FILENAME, interval=100)]
125131

126132
# Optionally, we can reload a previous model's weights and continue training from there
127-
# WEIGHTS_FILENAME = 'weights/duel_dqn_planar_crane-v0_weights_1024_4_50000_2017-07-12_160853.h5f'
133+
# LOAD_WEIGHTS_FILENAME = 'weights/duel_dqn_planar_crane-v0_weights_1024_4_50000_2017-07-12_160853.h5f'
128134
# # # Load the model weights
129-
# dqn.load_weights(WEIGHTS_FILENAME)
135+
# agent.load_weights(LOAD_WEIGHTS_FILENAME)
130136

131137
# Okay, now it's time to learn something! We visualize the training here for show, but this
132138
# slows down training quite a lot. You can always safely abort the training prematurely using
133139
# Ctrl + C.
134-
dqn.fit(env, nb_steps=NUM_STEPS, callbacks=callbacks, visualize=False, verbose=1, nb_max_episode_steps=500)
140+
agent.fit(env, nb_steps=NUM_STEPS, callbacks=callbacks, visualize=False, verbose=1, nb_max_episode_steps=500)
135141

136142
# After training is done, we save the final weights.
137-
dqn.save_weights(filename, overwrite=True)
143+
agent.save_weights(WEIGHT_FILENAME, overwrite=True)
138144

139145
# Finally, evaluate our algorithm for 5 episodes.
140-
# dqn.test(env, nb_episodes=5, nb_max_episode_steps=500, visualize=True)
146+
agent.test(env, nb_episodes=5, nb_max_episode_steps=500, visualize=True)

OpenAI Gym/planar_crane/planar_crane.py

-3
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,6 @@ def _step(self, action):
118118
if np.abs(distance_to_target) >= 0.01:
119119
reward = -1.0 - 10*theta**2 - 0.1*self.x_accel**2 - limits*10
120120
else:
121-
# if self.x_accel**2 < 1:
122-
# reward = 100000.0
123-
# else:
124121
reward = 1000.0
125122

126123

0 commit comments

Comments
 (0)