Initial commit of OpenAI Gym environment for variable length pendulum

DocVaughan · DocVaughan · commit e7bda1dd6ec7 · 2017-07-07T16:03:14.000-05:00
diff --git a/OpenAI Gym/Variable_Length_Pendulum.ipynb b/OpenAI Gym/Variable_Length_Pendulum.ipynb
diff --git a/OpenAI Gym/openAI_variableLengthPendulum.py b/OpenAI Gym/openAI_variableLengthPendulum.py
@@ -0,0 +1,54 @@
+#! /usr/bin/env python
+
+###############################################################################
+# openAI_variableLengthPendulum.py
+#
+# script to run the variable lenght pendulum OpenAI environment
+#
+# NOTE: Any plotting is set up for output, not viewing on screen.
+#       So, it will likely be ugly on screen. The saved PDFs should look
+#       better.
+#
+# Created: 07/07/17
+#   - Joshua Vaughan
+#   - joshua.vaughan@louisiana.edu
+#   - http://www.ucs.louisiana.edu/~jev9637
+#
+# Modified:
+#   * 
+#
+# TODO:
+#   * 
+###############################################################################
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+import gym
+import variable_pendulum
+
+env = gym.make('variable_pendulum-v0')
+
+# run 5 episodes of 1000 timesteps, taking random actions at each step
+for i_episode in range(5):
+    observation = env.reset()
+    for t in range(1000):
+        env.render()
+        
+        # just randomly choose an action
+        action = env.action_space.sample() 
+        observation, reward, done, info = env.step(action)
+        
+        # Finally, print the updated state of the system
+        print("\033[2J\033[;H") # Clear the terminal each time
+        j = 40  # padding
+        d = '.' # what to fill with
+        print("Theta (deg).:".ljust(j,d), '{:+8.3f}'.format(observation[0]*180/np.pi))
+        print("Theta_dot (deg/s).:".ljust(j,d), '{:+8.3f}'.format(observation[1]*180/np.pi))
+        print("L (m).:".ljust(j,d), '{:+8.3f}'.format(observation[2]))
+        print("L_dot (m/s).:".ljust(j,d), '{:+8.3f}'.format(observation[3]))
+        print("Reward:".ljust(j,d), '{:+8.3f}'.format(reward))
+
+        if done:
+            print("Episode finished after {} timesteps".format(t+1))
+            break
diff --git a/OpenAI Gym/variable_pendulum/__init__.py b/OpenAI Gym/variable_pendulum/__init__.py
@@ -0,0 +1,29 @@
+#! /usr/bin/env python
+
+###############################################################################
+# __init__.py
+#
+# initialization for the variable_pendulum OpenAI environment
+#
+# NOTE: Any plotting is set up for output, not viewing on screen.
+#       So, it will likely be ugly on screen. The saved PDFs should look
+#       better.
+#
+# Created: 07/07/17
+#   - Joshua Vaughan
+#   - joshua.vaughan@louisiana.edu
+#   - http://www.ucs.louisiana.edu/~jev9637
+#
+# Modified:
+#   * 
+#
+# TODO:
+#   * 
+###############################################################################
+
+from gym.envs.registration import register
+
+register(
+    id='variable_pendulum-v0',
+    entry_point='variable_pendulum.variable_pendulum:VariablePendulumEnv',
+)
diff --git a/OpenAI Gym/variable_pendulum/variable_pendulum.py b/OpenAI Gym/variable_pendulum/variable_pendulum.py
@@ -0,0 +1,180 @@
+#! /usr/bin/env python
+
+###############################################################################
+# variable_pendulum.py
+#
+# Defines a variable-length pendulum environment for use with the openAI Gym.
+#
+# NOTE: Any plotting is set up for output, not viewing on screen.
+#       So, it will likely be ugly on screen. The saved PDFs should look
+#       better.
+#
+# Created: 07/07/17
+#   - Joshua Vaughan
+#   - joshua.vaughan@louisiana.edu
+#   - http://www.ucs.louisiana.edu/~jev9637
+#
+# Modified:
+#   * 
+#
+# TODO:
+#   * 
+###############################################################################
+
+
+
+import gym
+from gym import spaces
+from gym.utils import seeding
+import logging
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+class VariablePendulumEnv(gym.Env):
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second' : 50
+    }
+    
+    # actions available, hoist down, do nothing, hoist up
+    MAX_CABLE_ACCEL = 1.0
+    AVAIL_CABLE_ACCEL =  [-MAX_CABLE_ACCEL, 0, MAX_CABLE_ACCEL]  
+
+    def __init__(self):
+        self.gravity = 9.8          # accel. due to gravity (m/s^2)
+        self.masspend = 1.0         # mass of the pendulum point mass (kg)
+        self.max_cable_accel = 0.25 # maximum acceleration of cable (m/s^2)
+        self.tau = 0.02             # seconds between state updates
+
+        
+        # Define thesholds for failing episode
+        self.theta_threshold = 45 * np.pi / 360     # +/- 45 degree limit (rad)
+        self.l_max_threshold = 3.0                  # max cable length (m)
+        self.l_min_threshold = 0.5                  # min cable length (m)
+
+        # The action space is continuous inputs between 
+        #self.action_space = spaces.Box(-self.max_cable_accel, self.max_cable_accel, shape = (1,))
+        
+        # This action space is just hoist down, do nothing, hoist up
+        self.action_space = spaces.Discrete(3)
+        
+        high_limit = np.array([2*self.theta_threshold, # max observable angle 
+                               10*2*self.theta_threshold, # max observable angular vel.
+                               10,                     # max observable length
+                               2])                     # max observable cable vel
+        
+        low_limit = np.array([-2*self.theta_threshold, # max observable angle 
+                              -10*2*self.theta_threshold, # max observable angular vel.
+                              0,                     # max observable length
+                              -2])                     # max observable cable vel
+                             
+        self.observation_space = spaces.Box(high_limit, low_limit)
+
+        self._seed()
+        self.viewer = None
+        self.state = None
+
+        self.steps_beyond_done = None
+
+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def _step(self, action):
+        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
+        
+        state = self.state
+        theta, theta_dot, l, l_dot = state
+
+        cable_accel = self.AVAIL_CABLE_ACCEL[action]
+
+        theta_ddot = -l_dot/l * theta_dot - self.gravity/l * np.sin(theta)
+        l_ddot = cable_accel
+
+        theta = theta + self.tau * theta_dot
+        theta_dot = theta_dot + self.tau * theta_ddot
+
+        l  = l + self.tau * l_dot
+        l_dot = l_dot + self.tau * l_ddot
+        self.state = (theta, theta_dot, l, l_dot)
+        
+        done =  l > self.l_max_threshold \
+                or l < self.l_min_threshold \
+                or theta < -self.theta_threshold \
+                or theta > self.theta_threshold
+        
+        done = bool(done)
+
+        if not done:
+            reward = -np.abs(theta) # a negative award for nonzero angles
+        else:
+#             if self.steps_beyond_done == 0:
+#                 logger.warning("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
+#             self.steps_beyond_done += 1
+            reward = 0.0
+
+        return np.array(self.state), reward, done, {}
+
+    def _reset(self):
+#         self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
+        self.state = np.array([10*np.pi/180, 0, 2, 0]) + self.np_random.uniform(low=-0.1, high=0.1, size=(4,))
+        self.steps_beyond_done = None
+        return np.array(self.state)
+
+    def _render(self, mode='human', close=False):
+        if close:
+            if self.viewer is not None:
+                self.viewer.close()
+                self.viewer = None
+            return
+
+        screen_width = 600
+        screen_height = 400
+
+        world_width = 2 * self.l_max_threshold# * np.sin(self.theta_threshold)
+        scale = screen_width/world_width
+        cable_pin = screen_height - 10
+        payload_size = 10.0
+        cable_width = 2.0
+        
+
+        theta, theta_dot, l, l_dot = self.state
+
+        if self.viewer is None:
+            self.l_init = l # save the initial length for scaling cable
+            from gym.envs.classic_control import rendering
+            self.viewer = rendering.Viewer(screen_width, screen_height)
+            
+            # define the cable as a polygon, so we can change its length later
+            l,r,t,b = -cable_width/2, cable_width/2, cable_width/2, -l*scale-cable_width/2
+            self.cable = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
+            self.cabletrans = rendering.Transform(translation=(screen_width/2, cable_pin))
+            self.cable.add_attr(self.cabletrans)
+            self.cable.set_color(0.25,0.25,0.25)    # darj gray
+            self.viewer.add_geom(self.cable)
+            
+            # the payload is a circle.
+            self.payload = rendering.make_circle(payload_size)
+            self.payloadtrans = rendering.Transform(translation=(screen_width/2, cable_pin-l*scale))
+            self.payload.add_attr(self.payloadtrans)
+            self.payload.set_color(0.5,0.5,0.5)  # dark gray
+            self.viewer.add_geom(self.payload)
+
+
+        if self.state is None: 
+            return None
+
+        # calculate the payload position in the window, then move it there
+        payload_screen_x = screen_width/2 + l*np.sin(theta)*scale
+        payload_screen_y = cable_pin - l*np.cos(theta)*scale
+        self.payloadtrans.set_translation(payload_screen_x, payload_screen_y)
+
+        # rotate the cable
+        self.cabletrans.set_rotation(theta)
+        
+        # change its length by scaling its length relative to its initial length
+        self.cabletrans.set_scale(1, l/self.l_init)
+
+        return self.viewer.render(return_rgb_array = mode=='rgb_array')