Skip to content

Commit 7a314dd

Browse files
committed
Added two-mass-spring-damper OpenAI gym environment and test scripts
1 parent 2339c3c commit 7a314dd

15 files changed

+933
-77
lines changed

OpenAI Gym/keras-rl_FileLogger_processing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import json # the data files generated are json
2929

3030
# TODO: 07/13/17 - JEV - Add GUI, argparse, or CLI for selecting file
31-
FILENAME = 'logs/ddpg_planar_crane_continuous-v0_log_2048_3_100000_2017-07-14_175832.json'
31+
FILENAME = 'logs/ddpg_planar_crane_continuous-v0_log_32_3_100000_2017-07-20_012022.json'
3232

3333

3434

@@ -47,6 +47,7 @@
4747
episode_steps = np.array(data['nb_episode_steps'])
4848
cumulative_steps = np.array(data['nb_steps'])
4949

50+
# time =
5051

5152
# ---- Plot evolution of reward over episodes --------------------------------
5253
# Set the plot size - 3x2 aspect ratio is best
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#! /usr/bin/env python
2+
3+
###############################################################################
4+
# __init__.py
5+
#
6+
# initialization for the mass_spring_damper_continuous OpenAI environment
7+
#
8+
# NOTE: Any plotting is set up for output, not viewing on screen.
9+
# So, it will likely be ugly on screen. The saved PDFs should look
10+
# better.
11+
#
12+
# Created: 04/08/18
13+
# - Joshua Vaughan
14+
15+
# - http://www.ucs.louisiana.edu/~jev9637
16+
#
17+
# Modified:
18+
# *
19+
#
20+
# TODO:
21+
# *
22+
###############################################################################
23+
24+
from gym.envs.registration import register
25+
26+
register(
27+
id='mass_spring_damper_continuous-v0',
28+
entry_point='mass_spring_damper_continuous.mass_spring_damper_continuous:MassSpringDamperContEnv',
29+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
#! /usr/bin/env python
2+
3+
###############################################################################
4+
# mass_spring_damper_continuous.py
5+
#
6+
# Defines a openAI Gym environment for use a two mass-spring-damper system.
7+
# This version has a continuous range of inputs for the m1 force input.
8+
#
9+
# NOTE: Any plotting is set up for output, not viewing on screen.
10+
# So, it will likely be ugly on screen. The saved PDFs should look
11+
# better.
12+
#
13+
# Created: 04/08/18
14+
# - Joshua Vaughan
15+
16+
# - http://www.ucs.louisiana.edu/~jev9637
17+
#
18+
# Modified:
19+
# *
20+
#
21+
# TODO:
22+
# *
23+
###############################################################################
24+
25+
26+
import gym
27+
from gym import spaces
28+
from gym.utils import seeding
29+
import logging
30+
import numpy as np
31+
import datetime # for unique filenames
32+
33+
logger = logging.getLogger(__name__)
34+
35+
36+
class MassSpringDamperContEnv(gym.Env):
37+
metadata = {
38+
'render.modes': ['human', 'rgb_array'],
39+
'video.frames_per_second' : 50
40+
}
41+
42+
43+
def __init__(self):
44+
self.m1 = 1.0 # mass of the mass 1 (kg)
45+
self.m2 = 1.0 # mass of the mass 2(kg)
46+
self.k = 2*np.pi**2 # spring constant (N/m)
47+
self.spring_equil = 1.0 # equilibrium length of spring 0.25 m
48+
self.c = 0.0 # damping coefficient (Ns/m)
49+
50+
self.tau = 0.02 # seconds between state updates
51+
self.counter = 0 # counter for number of steps
52+
self.desired_position = 0 # desired final position of system
53+
self.max_force = 25.0 # maximum force allowed (N)
54+
self.SAVE_DATA = False # set True to save episode data
55+
self.MAX_STEPS = 500 # maximum number of steps to run
56+
57+
# Define thesholds for trial limits, penalized heavily for exceeding these
58+
self.mass_pos_threshold = 4.0 # max mass position (m)
59+
self.mass_vel_threshold = 0.5 # max mass velocity (m/s)
60+
61+
# This action space is the range of acceleration of the trolley
62+
self.action_space = spaces.Box(low=-self.max_force,
63+
high=self.max_force,
64+
shape = (1,))
65+
66+
high_limit = np.array([self.mass_pos_threshold, # max observable angle
67+
self.mass_vel_threshold, # max observable angular vel.
68+
self.mass_pos_threshold, # max observable position
69+
self.mass_vel_threshold]) # max observable cable vel
70+
71+
low_limit = -high_limit # limits are symmetric about 0
72+
73+
self.observation_space = spaces.Box(high_limit, low_limit)
74+
75+
self._seed()
76+
self.viewer = None
77+
self.state = None
78+
self.done = False
79+
self.force = 0.0
80+
81+
def _seed(self, seed=None):
82+
self.np_random, seed = seeding.np_random(seed)
83+
return [seed]
84+
85+
def _step(self, action):
86+
x1, x1_dot, x2, x2_dot = self.state
87+
self.counter = self.counter + 1
88+
89+
# Get the action and clip it to the min/max trolley accel
90+
self.force = np.clip(action[0], -self.max_force, self.max_force)
91+
92+
93+
# Update m1 states
94+
x1_accel = 1 / self.m1 * (self.k * (x2 - x1 - self.spring_equil) +
95+
self.c * (x2_dot - x1_dot) +
96+
self.force)
97+
98+
99+
x1_dot = x1_dot + self.tau * x1_accel
100+
101+
# Get the action and clip it to the min/max m1 vel
102+
x1_dot = np.clip(x1_dot, -self.mass_vel_threshold, self.mass_vel_threshold)
103+
104+
x1 = x1 + self.tau * x1_dot
105+
106+
# Update m2 states
107+
x2_accel = 1 / self.m2 * (-self.k * (x2 - x1 - self.spring_equil) +
108+
-self.c * (x2_dot - x1_dot))
109+
110+
111+
x2_dot = x2_dot + self.tau * x2_accel
112+
113+
# Get the action and clip it to the min/max m2 accel
114+
x2_dot = np.clip(x2_dot, -self.mass_vel_threshold, self.mass_vel_threshold)
115+
116+
x2 = x2 + self.tau * x2_dot
117+
118+
self.state = (x1, x1_dot, x2, x2_dot)
119+
120+
# Define a boolean on whether we're exceeding limits or not. We'll just penalize
121+
# any of these conditions identically in the reward function
122+
limits = x1 > self.mass_pos_threshold \
123+
or x1 < -self.mass_pos_threshold \
124+
or x1_dot > self.mass_vel_threshold \
125+
or x1_dot < -self.mass_vel_threshold \
126+
or x2 > self.mass_pos_threshold \
127+
or x2 < -self.mass_pos_threshold \
128+
or x2_dot > self.mass_vel_threshold \
129+
or x2_dot < -self.mass_vel_threshold \
130+
131+
132+
# TODO: 04/08/18 - This has *huge* effect on the outcome. Decide "optimal" reward scheme.
133+
# Use COM position
134+
#distance_to_target = self.desired_position - (self.m1 * x1 + self.m2 * x2) / (self.m1 + self.m2)
135+
136+
# Use m2 position
137+
distance_to_target = self.desired_position - x2
138+
139+
# clip the reward to +/-10
140+
reward = np.clip(-10*distance_to_target**2 - self.force**2, -10, 1)
141+
142+
if self.SAVE_DATA:
143+
current_data = np.array([self.counter * self.tau, x1, x1_dot, x2, x2_dot, self.force, reward])
144+
self.episode_data[self.counter, :] = current_data
145+
146+
if self.counter >= self.MAX_STEPS:
147+
self.done = True
148+
149+
if self.done == True and self.SAVE_DATA:
150+
header = header='Time (s), x1 (m), x1_dot (m/s), x2 (m), x2_dot (m/s), Force (N), Reward'
151+
data_filename = 'example_data/EpisodeData_{}.csv'.format(datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S'))
152+
np.savetxt(data_filename, self.episode_data, header=header, delimiter=',')
153+
154+
return np.array(self.state), reward, self.done, {}
155+
156+
def _reset(self):
157+
# TODO: 07/07/17 - Probably need more randomness in initial conditions
158+
rand_pos = self.np_random.uniform(low=-3.0, high=3.0)
159+
160+
self.state = np.array([rand_pos,
161+
0,
162+
rand_pos + self.spring_equil,
163+
0])
164+
165+
# Reset the counter and the data recorder array
166+
self.counter = 0
167+
168+
# Reset the done flag
169+
self.done = False
170+
171+
if self.SAVE_DATA:
172+
self.episode_data = np.zeros((self.MAX_STEPS+1, 7))
173+
self.episode_data[0,:] = np.array([0, self.state[0], self.state[1], self.state[2], self.state[3], 0, 0])
174+
175+
return np.array(self.state)
176+
177+
def _render(self, mode='human', close=False):
178+
if close:
179+
if self.viewer is not None:
180+
self.viewer.close()
181+
self.viewer = None
182+
return
183+
184+
screen_width = 600
185+
screen_height = 400
186+
187+
world_width = 1.5 * self.mass_pos_threshold
188+
scale = screen_width / world_width # Scale according to width
189+
190+
191+
# Define the mass size and its offset from the bottom of the screen (pixels)
192+
mass_width = 30.0
193+
mass_height = 30.0
194+
mass_yOffset = screen_height/2
195+
196+
target_size = 10
197+
COM_size = 5
198+
199+
x1, x1_dot, x2, x2_dot = self.state
200+
201+
COM_position = (self.m1 * x1 + self.m2 * (x2)) / (self.m1 + self.m2)
202+
203+
if self.viewer is None: # Initial scene setup
204+
from gym.envs.classic_control import rendering
205+
self.viewer = rendering.Viewer(screen_width, screen_height)
206+
207+
# the target is a series of circles, a bullseye
208+
self.target = rendering.make_circle(target_size*2)
209+
self.targettrans = rendering.Transform(translation=(screen_width/2 + self.desired_position*scale, mass_yOffset))
210+
self.target.add_attr(self.targettrans)
211+
self.target.set_color(1,0,0) # red
212+
self.viewer.add_geom(self.target)
213+
214+
self.target = rendering.make_circle(target_size*1.25)
215+
self.targettrans = rendering.Transform(translation=(screen_width/2 + self.desired_position*scale, mass_yOffset))
216+
self.target.add_attr(self.targettrans)
217+
self.target.set_color(1,1,1) # white
218+
self.viewer.add_geom(self.target)
219+
220+
self.target = rendering.make_circle(target_size/2)
221+
self.targettrans = rendering.Transform(translation=(screen_width/2 + self.desired_position*scale, mass_yOffset))
222+
self.target.add_attr(self.targettrans)
223+
self.target.set_color(1,0,0) # red
224+
self.viewer.add_geom(self.target)
225+
226+
227+
# the COM is a series of circles, a bullseye
228+
self.COM1 = rendering.make_circle(COM_size*2)
229+
self.COM1trans = rendering.Transform(translation=(screen_width/2 + COM_position*scale, mass_yOffset))
230+
self.COM1.add_attr(self.COM1trans)
231+
self.COM1.set_color(0,0,0) # black
232+
self.viewer.add_geom(self.COM1)
233+
234+
self.COM2 = rendering.make_circle(COM_size*1.25)
235+
self.COM2trans = rendering.Transform(translation=(screen_width/2 + COM_position*scale, mass_yOffset))
236+
self.COM2.add_attr(self.COM2trans)
237+
self.COM2.set_color(1,1,1) # white
238+
self.viewer.add_geom(self.COM2)
239+
240+
self.COM3 = rendering.make_circle(COM_size/2)
241+
self.COM3trans = rendering.Transform(translation=(screen_width/2 + COM_position*scale, mass_yOffset))
242+
self.COM3.add_attr(self.COM3trans)
243+
self.COM3.set_color(0,0,0) # black
244+
self.viewer.add_geom(self.COM3)
245+
246+
247+
# Define the trolley polygon
248+
l,r,t,b = -mass_width/2, mass_width/2, mass_height/2, -mass_height/2
249+
250+
self.mass1 = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
251+
self.mass1trans = rendering.Transform(translation=(screen_width/2 + x1*scale, mass_yOffset))
252+
self.mass1.add_attr(self.mass1trans)
253+
self.mass1.set_color(0.85,0.85,0.85) # light gray
254+
self.viewer.add_geom(self.mass1)
255+
256+
self.mass2 = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
257+
self.mass2trans = rendering.Transform(translation=(screen_width/2 + x2*scale, mass_yOffset))
258+
self.mass2.add_attr(self.mass2trans)
259+
self.mass2.set_color(0.85,0.85,0.85) # light gray
260+
self.viewer.add_geom(self.mass2)
261+
262+
263+
# This is a bar that shows the direction of the current accel. command
264+
l,r,t,b = -10.0, 10.0, 4, -4
265+
self.accel = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
266+
self.acceltrans = rendering.Transform(translation=(screen_width/2 + x1*scale-mass_width/2, mass_yOffset))
267+
self.accel.add_attr(self.acceltrans)
268+
self.accel.set_color(0.1, 0.1, 0.5)
269+
self.viewer.add_geom(self.accel)
270+
271+
272+
# move the trolley
273+
self.mass1trans.set_translation(screen_width/2 + x1*scale, mass_yOffset)
274+
self.mass2trans.set_translation(screen_width/2 + x2*scale, mass_yOffset)
275+
self.COM1trans.set_translation(screen_width/2 + COM_position*scale, mass_yOffset)
276+
self.COM2trans.set_translation(screen_width/2 + COM_position*scale, mass_yOffset)
277+
self.COM3trans.set_translation(screen_width/2 + COM_position*scale, mass_yOffset)
278+
279+
# show the accel direction
280+
accel_scaling = 1 / (25 * self.max_force) * self.force * scale
281+
self.acceltrans.set_translation(screen_width/2 + (x1*scale + (accel_scaling/2)), mass_yOffset)
282+
self.acceltrans.set_scale(accel_scaling, 1)
283+
284+
return self.viewer.render(return_rgb_array = mode=='rgb_array')

0 commit comments

Comments
 (0)