Skip to content

Commit e7bda1d

Browse files
committedJul 7, 2017
Initial commit of OpenAI Gym environment for variable length pendulum
1 parent 490566b commit e7bda1d

File tree

4 files changed

+710
-0
lines changed

4 files changed

+710
-0
lines changed
 

‎OpenAI Gym/Variable_Length_Pendulum.ipynb

+447
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#! /usr/bin/env python
2+
3+
###############################################################################
4+
# openAI_variableLengthPendulum.py
5+
#
6+
# script to run the variable lenght pendulum OpenAI environment
7+
#
8+
# NOTE: Any plotting is set up for output, not viewing on screen.
9+
# So, it will likely be ugly on screen. The saved PDFs should look
10+
# better.
11+
#
12+
# Created: 07/07/17
13+
# - Joshua Vaughan
14+
# - joshua.vaughan@louisiana.edu
15+
# - http://www.ucs.louisiana.edu/~jev9637
16+
#
17+
# Modified:
18+
# *
19+
#
20+
# TODO:
21+
# *
22+
###############################################################################
23+
24+
import numpy as np
25+
import matplotlib.pyplot as plt
26+
27+
import gym
28+
import variable_pendulum
29+
30+
env = gym.make('variable_pendulum-v0')
31+
32+
# run 5 episodes of 1000 timesteps, taking random actions at each step
33+
for i_episode in range(5):
34+
observation = env.reset()
35+
for t in range(1000):
36+
env.render()
37+
38+
# just randomly choose an action
39+
action = env.action_space.sample()
40+
observation, reward, done, info = env.step(action)
41+
42+
# Finally, print the updated state of the system
43+
print("\033[2J\033[;H") # Clear the terminal each time
44+
j = 40 # padding
45+
d = '.' # what to fill with
46+
print("Theta (deg).:".ljust(j,d), '{:+8.3f}'.format(observation[0]*180/np.pi))
47+
print("Theta_dot (deg/s).:".ljust(j,d), '{:+8.3f}'.format(observation[1]*180/np.pi))
48+
print("L (m).:".ljust(j,d), '{:+8.3f}'.format(observation[2]))
49+
print("L_dot (m/s).:".ljust(j,d), '{:+8.3f}'.format(observation[3]))
50+
print("Reward:".ljust(j,d), '{:+8.3f}'.format(reward))
51+
52+
if done:
53+
print("Episode finished after {} timesteps".format(t+1))
54+
break
+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#! /usr/bin/env python
2+
3+
###############################################################################
4+
# __init__.py
5+
#
6+
# initialization for the variable_pendulum OpenAI environment
7+
#
8+
# NOTE: Any plotting is set up for output, not viewing on screen.
9+
# So, it will likely be ugly on screen. The saved PDFs should look
10+
# better.
11+
#
12+
# Created: 07/07/17
13+
# - Joshua Vaughan
14+
# - joshua.vaughan@louisiana.edu
15+
# - http://www.ucs.louisiana.edu/~jev9637
16+
#
17+
# Modified:
18+
# *
19+
#
20+
# TODO:
21+
# *
22+
###############################################################################
23+
24+
from gym.envs.registration import register
25+
26+
register(
27+
id='variable_pendulum-v0',
28+
entry_point='variable_pendulum.variable_pendulum:VariablePendulumEnv',
29+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
#! /usr/bin/env python
2+
3+
###############################################################################
4+
# variable_pendulum.py
5+
#
6+
# Defines a variable-length pendulum environment for use with the openAI Gym.
7+
#
8+
# NOTE: Any plotting is set up for output, not viewing on screen.
9+
# So, it will likely be ugly on screen. The saved PDFs should look
10+
# better.
11+
#
12+
# Created: 07/07/17
13+
# - Joshua Vaughan
14+
# - joshua.vaughan@louisiana.edu
15+
# - http://www.ucs.louisiana.edu/~jev9637
16+
#
17+
# Modified:
18+
# *
19+
#
20+
# TODO:
21+
# *
22+
###############################################################################
23+
24+
25+
26+
import gym
27+
from gym import spaces
28+
from gym.utils import seeding
29+
import logging
30+
import numpy as np
31+
32+
logger = logging.getLogger(__name__)
33+
34+
35+
class VariablePendulumEnv(gym.Env):
36+
metadata = {
37+
'render.modes': ['human', 'rgb_array'],
38+
'video.frames_per_second' : 50
39+
}
40+
41+
# actions available, hoist down, do nothing, hoist up
42+
MAX_CABLE_ACCEL = 1.0
43+
AVAIL_CABLE_ACCEL = [-MAX_CABLE_ACCEL, 0, MAX_CABLE_ACCEL]
44+
45+
def __init__(self):
46+
self.gravity = 9.8 # accel. due to gravity (m/s^2)
47+
self.masspend = 1.0 # mass of the pendulum point mass (kg)
48+
self.max_cable_accel = 0.25 # maximum acceleration of cable (m/s^2)
49+
self.tau = 0.02 # seconds between state updates
50+
51+
52+
# Define thesholds for failing episode
53+
self.theta_threshold = 45 * np.pi / 360 # +/- 45 degree limit (rad)
54+
self.l_max_threshold = 3.0 # max cable length (m)
55+
self.l_min_threshold = 0.5 # min cable length (m)
56+
57+
# The action space is continuous inputs between
58+
#self.action_space = spaces.Box(-self.max_cable_accel, self.max_cable_accel, shape = (1,))
59+
60+
# This action space is just hoist down, do nothing, hoist up
61+
self.action_space = spaces.Discrete(3)
62+
63+
high_limit = np.array([2*self.theta_threshold, # max observable angle
64+
10*2*self.theta_threshold, # max observable angular vel.
65+
10, # max observable length
66+
2]) # max observable cable vel
67+
68+
low_limit = np.array([-2*self.theta_threshold, # max observable angle
69+
-10*2*self.theta_threshold, # max observable angular vel.
70+
0, # max observable length
71+
-2]) # max observable cable vel
72+
73+
self.observation_space = spaces.Box(high_limit, low_limit)
74+
75+
self._seed()
76+
self.viewer = None
77+
self.state = None
78+
79+
self.steps_beyond_done = None
80+
81+
def _seed(self, seed=None):
82+
self.np_random, seed = seeding.np_random(seed)
83+
return [seed]
84+
85+
def _step(self, action):
86+
assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
87+
88+
state = self.state
89+
theta, theta_dot, l, l_dot = state
90+
91+
cable_accel = self.AVAIL_CABLE_ACCEL[action]
92+
93+
theta_ddot = -l_dot/l * theta_dot - self.gravity/l * np.sin(theta)
94+
l_ddot = cable_accel
95+
96+
theta = theta + self.tau * theta_dot
97+
theta_dot = theta_dot + self.tau * theta_ddot
98+
99+
l = l + self.tau * l_dot
100+
l_dot = l_dot + self.tau * l_ddot
101+
self.state = (theta, theta_dot, l, l_dot)
102+
103+
done = l > self.l_max_threshold \
104+
or l < self.l_min_threshold \
105+
or theta < -self.theta_threshold \
106+
or theta > self.theta_threshold
107+
108+
done = bool(done)
109+
110+
if not done:
111+
reward = -np.abs(theta) # a negative award for nonzero angles
112+
else:
113+
# if self.steps_beyond_done == 0:
114+
# logger.warning("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
115+
# self.steps_beyond_done += 1
116+
reward = 0.0
117+
118+
return np.array(self.state), reward, done, {}
119+
120+
def _reset(self):
121+
# self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
122+
self.state = np.array([10*np.pi/180, 0, 2, 0]) + self.np_random.uniform(low=-0.1, high=0.1, size=(4,))
123+
self.steps_beyond_done = None
124+
return np.array(self.state)
125+
126+
def _render(self, mode='human', close=False):
127+
if close:
128+
if self.viewer is not None:
129+
self.viewer.close()
130+
self.viewer = None
131+
return
132+
133+
screen_width = 600
134+
screen_height = 400
135+
136+
world_width = 2 * self.l_max_threshold# * np.sin(self.theta_threshold)
137+
scale = screen_width/world_width
138+
cable_pin = screen_height - 10
139+
payload_size = 10.0
140+
cable_width = 2.0
141+
142+
143+
theta, theta_dot, l, l_dot = self.state
144+
145+
if self.viewer is None:
146+
self.l_init = l # save the initial length for scaling cable
147+
from gym.envs.classic_control import rendering
148+
self.viewer = rendering.Viewer(screen_width, screen_height)
149+
150+
# define the cable as a polygon, so we can change its length later
151+
l,r,t,b = -cable_width/2, cable_width/2, cable_width/2, -l*scale-cable_width/2
152+
self.cable = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
153+
self.cabletrans = rendering.Transform(translation=(screen_width/2, cable_pin))
154+
self.cable.add_attr(self.cabletrans)
155+
self.cable.set_color(0.25,0.25,0.25) # darj gray
156+
self.viewer.add_geom(self.cable)
157+
158+
# the payload is a circle.
159+
self.payload = rendering.make_circle(payload_size)
160+
self.payloadtrans = rendering.Transform(translation=(screen_width/2, cable_pin-l*scale))
161+
self.payload.add_attr(self.payloadtrans)
162+
self.payload.set_color(0.5,0.5,0.5) # dark gray
163+
self.viewer.add_geom(self.payload)
164+
165+
166+
if self.state is None:
167+
return None
168+
169+
# calculate the payload position in the window, then move it there
170+
payload_screen_x = screen_width/2 + l*np.sin(theta)*scale
171+
payload_screen_y = cable_pin - l*np.cos(theta)*scale
172+
self.payloadtrans.set_translation(payload_screen_x, payload_screen_y)
173+
174+
# rotate the cable
175+
self.cabletrans.set_rotation(theta)
176+
177+
# change its length by scaling its length relative to its initial length
178+
self.cabletrans.set_scale(1, l/self.l_init)
179+
180+
return self.viewer.render(return_rgb_array = mode=='rgb_array')

0 commit comments

Comments
 (0)
Please sign in to comment.