Skip to content

Commit 5debfd0

Browse files
committed
Updates to OpenAI mass-spring-damper system and training
1 parent de80693 commit 5debfd0

File tree

2 files changed

+28
-19
lines changed

2 files changed

+28
-19
lines changed

Diff for: OpenAI Gym/mass_spring_damper_continuous/mass_spring_damper_continuous.py

+24-15
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import datetime # for unique filenames
3232

3333
# Import the ODE solver
34-
from scipy.integrate import odeint
34+
from scipy.integrate import solve_ivp
3535

3636
logger = logging.getLogger(__name__)
3737

@@ -87,7 +87,7 @@ def __init__(self):
8787
self.done = False
8888
self.force = 0.0
8989

90-
def eq_of_motion(self, w, t):
90+
def eq_of_motion(self, t, w):
9191
"""
9292
Defines the differential equations for the coupled spring-mass system.
9393
@@ -125,15 +125,23 @@ def _step(self, action):
125125
x0 = [x1, x1_dot, x2, x2_dot]
126126

127127
# Call the ODE solver.
128-
resp = odeint(self.eq_of_motion,
129-
x0,
130-
[0, self.tau],
131-
hmax=self.max_step,
132-
atol=self.abserr,
133-
rtol=self.relerr)
128+
solution = solve_ivp(self.eq_of_motion,
129+
[0, self.tau],
130+
x0,
131+
max_step=self.max_step,
132+
atol=self.abserr,
133+
rtol=self.relerr)
134134

135-
print(resp)
135+
resp = solution.y
136136

137+
x1 = resp[0, -1]
138+
x1_dot = resp[1, -1]
139+
x2 = resp[2, -1]
140+
x2_dot = resp[3, -1]
141+
142+
# TODO: 05/08/18 - JEV -
143+
# Remove after testing. Using a proper ODE solver as above is the more
144+
# scalable approach
137145
# Update m1 states
138146
# x1_accel = 1 / self.m1 * (self.k * (x2 - x1 - self.spring_equil) +
139147
# self.c * (x2_dot - x1_dot) +
@@ -158,11 +166,8 @@ def _step(self, action):
158166
# x2_dot = np.clip(x2_dot, -self.mass_vel_threshold, self.mass_vel_threshold)
159167
#
160168
# x2 = x2 + self.tau * x2_dot
161-
162-
x1 = resp[-1, 0]
163-
x1_dot = resp[-1, 0]
164-
x2 = resp[-1, 2]
165-
x2_dot = resp[-1, 3]
169+
# End of block to remove after testing
170+
166171

167172
self.state = (x1, x1_dot, x2, x2_dot)
168173

@@ -186,14 +191,18 @@ def _step(self, action):
186191
distance_to_target = self.desired_position - x2
187192

188193
# clip the reward to +/-10
189-
reward = np.clip(-10*distance_to_target**2 - self.force**2, -10, 1)
194+
reward = np.clip(-10*distance_to_target**2 - 0.01*self.force**2, -1, 1)
190195

191196
if self.SAVE_DATA:
192197
current_data = np.array([self.counter * self.tau, x1, x1_dot, x2, x2_dot, self.force, reward])
193198
self.episode_data[self.counter, :] = current_data
194199

195200
if self.counter >= self.MAX_STEPS:
196201
self.done = True
202+
203+
if limits:
204+
reward = -1000
205+
self.done = True
197206

198207
if self.done == True and self.SAVE_DATA:
199208
header = header='Time (s), x1 (m), x1_dot (m/s), x2 (m), x2_dot (m/s), Force (N), Reward'

Diff for: OpenAI Gym/openAI_massSpringContinuous_train.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@
4343

4444
ENV_NAME = 'mass_spring_damper_continuous-v0'
4545

46-
LAYER_SIZE = 32
46+
LAYER_SIZE = 512
4747
NUM_HIDDEN_LAYERS = 3
48-
NUM_STEPS = 10000
48+
NUM_STEPS = 1000000
4949
TRIAL_ID = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')
5050

5151
# Get the environment and extract the number of actions.
@@ -65,7 +65,7 @@
6565
NUM_STEPS,
6666
TRIAL_ID)
6767

68-
# env = gym.wrappers.Monitor(env, MONITOR_FILENAME, video_callable=False, force=True)
68+
env = gym.wrappers.Monitor(env, MONITOR_FILENAME, video_callable=False, force=True)
6969

7070

7171

@@ -148,4 +148,4 @@
148148
agent.save_weights(filename, overwrite=True)
149149

150150
# Finally, evaluate our algorithm for 5 episodes.
151-
agent.test(env, visualize=True) #nb_max_episode_steps=500,
151+
# agent.test(env, visualize=True) #nb_max_episode_steps=500,

0 commit comments

Comments
 (0)