Skip to content

Commit aff7c79

Browse files
committed
Updates to mass-spring-damper OpenAI Gym environment
1 parent 7a314dd commit aff7c79

5 files changed

+74
-26
lines changed

OpenAI Gym/mass_spring_damper_continuous/mass_spring_damper_continuous.py

+70-22
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030
import numpy as np
3131
import datetime # for unique filenames
3232

33+
# Import the ODE solver
34+
from scipy.integrate import solve_ivp
35+
3336
logger = logging.getLogger(__name__)
3437

3538

@@ -57,6 +60,12 @@ def __init__(self):
5760
# Define thesholds for trial limits, penalized heavily for exceeding these
5861
self.mass_pos_threshold = 4.0 # max mass position (m)
5962
self.mass_vel_threshold = 0.5 # max mass velocity (m/s)
63+
64+
# Set up solver parameters
65+
# ODE solver parameters
66+
self.abserr = 1.0e-9
67+
self.relerr = 1.0e-9
68+
self.max_step = 0.1
6069

6170
# This action space is the range of acceleration of the trolley
6271
self.action_space = spaces.Box(low=-self.max_force,
@@ -77,43 +86,82 @@ def __init__(self):
7786
self.state = None
7887
self.done = False
7988
self.force = 0.0
89+
90+
def eq_of_motion(self, t, w):
91+
"""
92+
Defines the differential equations for the coupled spring-mass system.
93+
94+
Arguments:
95+
w : vector of the state variables:
96+
t : time
97+
"""
98+
99+
x1 = w[0]
100+
x1_dot = w[1]
101+
x2 = w[2]
102+
x2_dot = w[3]
103+
104+
# Create sysODE = (x', x_dot', y', y_dot')
105+
sysODE = np.array([x1_dot,
106+
1 / self.m1 * (self.k * (x2 - x1 - self.spring_equil) + self.c * (x2_dot - x1_dot) + self.force),
107+
x2_dot,
108+
1 / self.m2 * (-self.k * (x2 - x1 - self.spring_equil) + -self.c * (x2_dot - x1_dot))])
109+
110+
return sysODE
80111

81112
def _seed(self, seed=None):
82113
self.np_random, seed = seeding.np_random(seed)
83114
return [seed]
84115

116+
85117
def _step(self, action):
86118
x1, x1_dot, x2, x2_dot = self.state
87119
self.counter = self.counter + 1
88120

89121
# Get the action and clip it to the min/max trolley accel
90122
self.force = np.clip(action[0], -self.max_force, self.max_force)
91123

92-
93-
# Update m1 states
94-
x1_accel = 1 / self.m1 * (self.k * (x2 - x1 - self.spring_equil) +
95-
self.c * (x2_dot - x1_dot) +
96-
self.force)
97-
98-
99-
x1_dot = x1_dot + self.tau * x1_accel
100-
101-
# Get the action and clip it to the min/max m1 vel
102-
x1_dot = np.clip(x1_dot, -self.mass_vel_threshold, self.mass_vel_threshold)
103-
104-
x1 = x1 + self.tau * x1_dot
124+
x0 = [x1, x1_dot, x2, x2_dot]
105125

106-
# Update m2 states
107-
x2_accel = 1 / self.m2 * (-self.k * (x2 - x1 - self.spring_equil) +
108-
-self.c * (x2_dot - x1_dot))
109-
110-
111-
x2_dot = x2_dot + self.tau * x2_accel
126+
# Call the ODE solver.
127+
solution = solve_ivp(self.eq_of_motion,
128+
[0, self.tau],
129+
x0,
130+
max_step=self.max_step,
131+
atol=self.abserr,
132+
rtol=self.relerr)
133+
134+
resp = solution.y
112135

113-
# Get the action and clip it to the min/max m2 accel
114-
x2_dot = np.clip(x2_dot, -self.mass_vel_threshold, self.mass_vel_threshold)
136+
# Update m1 states
137+
# x1_accel = 1 / self.m1 * (self.k * (x2 - x1 - self.spring_equil) +
138+
# self.c * (x2_dot - x1_dot) +
139+
# self.force)
140+
#
141+
#
142+
# x1_dot = x1_dot + self.tau * x1_accel
143+
#
144+
# # Get the action and clip it to the min/max m1 vel
145+
# x1_dot = np.clip(x1_dot, -self.mass_vel_threshold, self.mass_vel_threshold)
146+
#
147+
# x1 = x1 + self.tau * x1_dot
148+
#
149+
# # Update m2 states
150+
# x2_accel = 1 / self.m2 * (-self.k * (x2 - x1 - self.spring_equil) +
151+
# -self.c * (x2_dot - x1_dot))
152+
#
153+
#
154+
# x2_dot = x2_dot + self.tau * x2_accel
155+
#
156+
# # Get the action and clip it to the min/max m2 accel
157+
# x2_dot = np.clip(x2_dot, -self.mass_vel_threshold, self.mass_vel_threshold)
158+
#
159+
# x2 = x2 + self.tau * x2_dot
115160

116-
x2 = x2 + self.tau * x2_dot
161+
x1 = resp[0, -1]
162+
x1_dot = resp[1, -1]
163+
x2 = resp[2, -1]
164+
x2_dot = resp[3, -1]
117165

118166
self.state = (x1, x1_dot, x2, x2_dot)
119167

OpenAI Gym/openAI_massSpringContinuous_episodeDataProcessing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import numpy as np
2626
import matplotlib.pyplot as plt
2727

28-
FILENAME = 'example_data/EpisodeData_2018-04-08_212624.csv'
28+
FILENAME = 'example_data/EpisodeData_2018-04-27_174916.csv'
2929

3030
# Files have data saved as:
3131
# Time (s), Angle (rad), Angle (rad/s), Trolley Pos (m), Trolly Vel (m/s), Trolley Accel (m/s^2), Reward

OpenAI Gym/openAI_massSpringContinuous_randomAction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
###############################################################################
2323

2424
import numpy as np
25-
import matplotlib.pyplot as plt
25+
# import matplotlib.pyplot as plt
2626

2727
import gym
2828
import time

OpenAI Gym/openAI_massSpringContinuous_test.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444

4545
LAYER_SIZE = 32
4646
NUM_HIDDEN_LAYERS = 3
47-
NUM_STEPS = 100000
47+
NUM_STEPS = 10000
4848
TRIAL_ID = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')
4949

5050
# TODO: Add file picker GUI - For now, look for files with the format below

OpenAI Gym/openAI_massSpringContinuous_train.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545

4646
LAYER_SIZE = 32
4747
NUM_HIDDEN_LAYERS = 3
48-
NUM_STEPS = 500000
48+
NUM_STEPS = 10000
4949
TRIAL_ID = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')
5050

5151
# Get the environment and extract the number of actions.

0 commit comments

Comments
 (0)