Updates to mass-spring-damper OpenAI Gym environment

DocVaughan · DocVaughan · commit aff7c79f4793 · 2018-05-08T11:18:45.000-05:00
diff --git a/OpenAI Gym/mass_spring_damper_continuous/mass_spring_damper_continuous.py b/OpenAI Gym/mass_spring_damper_continuous/mass_spring_damper_continuous.py
@@ -30,6 +30,9 @@
 import numpy as np
 import datetime # for unique filenames
 
+# Import the ODE solver
+from scipy.integrate import solve_ivp
+
 logger = logging.getLogger(__name__)
 
 
@@ -57,6 +60,12 @@ def __init__(self):
         # Define thesholds for trial limits, penalized heavily for exceeding these
         self.mass_pos_threshold = 4.0                 # max mass position (m)
         self.mass_vel_threshold = 0.5                 # max mass velocity (m/s)
+        
+        # Set up solver parameters
+        # ODE solver parameters
+        self.abserr = 1.0e-9
+        self.relerr = 1.0e-9
+        self.max_step = 0.1
 
         # This action space is the range of acceleration of the trolley
         self.action_space = spaces.Box(low=-self.max_force,
@@ -77,43 +86,82 @@ def __init__(self):
         self.state = None
         self.done = False
         self.force = 0.0
+        
+    def eq_of_motion(self, t, w):
+        """
+        Defines the differential equations for the coupled spring-mass system.
+
+        Arguments:
+            w :  vector of the state variables:
+            t :  time
+        """
+    
+        x1 = w[0]
+        x1_dot = w[1]
+        x2 = w[2]
+        x2_dot = w[3]
+
+        # Create sysODE = (x', x_dot', y', y_dot')
+        sysODE = np.array([x1_dot,
+                           1 / self.m1 * (self.k * (x2 - x1 - self.spring_equil) + self.c * (x2_dot - x1_dot) + self.force),
+                           x2_dot,
+                           1 / self.m2 * (-self.k * (x2 - x1 - self.spring_equil) + -self.c * (x2_dot - x1_dot))])
+    
+        return sysODE
 
     def _seed(self, seed=None):
         self.np_random, seed = seeding.np_random(seed)
         return [seed]
 
+
     def _step(self, action):
         x1, x1_dot, x2, x2_dot = self.state
         self.counter = self.counter + 1
         
         # Get the action and clip it to the min/max trolley accel
         self.force = np.clip(action[0], -self.max_force, self.max_force)
         
-        
-        # Update m1 states
-        x1_accel = 1 / self.m1 * (self.k * (x2 - x1 - self.spring_equil) + 
-                                  self.c * (x2_dot - x1_dot) + 
-                                  self.force)
-        
-        
-        x1_dot = x1_dot + self.tau * x1_accel
-        
-        # Get the action and clip it to the min/max m1 vel
-        x1_dot = np.clip(x1_dot, -self.mass_vel_threshold, self.mass_vel_threshold)
-        
-        x1  = x1 + self.tau * x1_dot
+        x0 = [x1, x1_dot, x2, x2_dot]
 
-        # Update m2 states
-        x2_accel = 1 / self.m2 * (-self.k * (x2 - x1 - self.spring_equil) + 
-                                  -self.c * (x2_dot - x1_dot))
-        
-        
-        x2_dot = x2_dot + self.tau * x2_accel
+        # Call the ODE solver.
+        solution = solve_ivp(self.eq_of_motion, 
+                             [0, self.tau], 
+                             x0, 
+                             max_step=self.max_step, 
+                             atol=self.abserr, 
+                             rtol=self.relerr)
+
+        resp = solution.y
         
-        # Get the action and clip it to the min/max m2 accel
-        x2_dot = np.clip(x2_dot, -self.mass_vel_threshold, self.mass_vel_threshold)
+        # Update m1 states
+#         x1_accel = 1 / self.m1 * (self.k * (x2 - x1 - self.spring_equil) + 
+#                                   self.c * (x2_dot - x1_dot) + 
+#                                   self.force)
+#         
+#         
+#         x1_dot = x1_dot + self.tau * x1_accel
+#         
+#         # Get the action and clip it to the min/max m1 vel
+#         x1_dot = np.clip(x1_dot, -self.mass_vel_threshold, self.mass_vel_threshold)
+#         
+#         x1  = x1 + self.tau * x1_dot
+# 
+#         # Update m2 states
+#         x2_accel = 1 / self.m2 * (-self.k * (x2 - x1 - self.spring_equil) + 
+#                                   -self.c * (x2_dot - x1_dot))
+#         
+#         
+#         x2_dot = x2_dot + self.tau * x2_accel
+#         
+#         # Get the action and clip it to the min/max m2 accel
+#         x2_dot = np.clip(x2_dot, -self.mass_vel_threshold, self.mass_vel_threshold)
+#        
+#         x2  = x2 + self.tau * x2_dot
         
-        x2  = x2 + self.tau * x2_dot
+        x1 = resp[0, -1]
+        x1_dot = resp[1, -1]
+        x2 = resp[2, -1]
+        x2_dot = resp[3, -1]
 
         self.state = (x1, x1_dot, x2, x2_dot)
         
diff --git a/OpenAI Gym/openAI_massSpringContinuous_episodeDataProcessing.py b/OpenAI Gym/openAI_massSpringContinuous_episodeDataProcessing.py
@@ -25,7 +25,7 @@
 import numpy as np
 import matplotlib.pyplot as plt
 
-FILENAME = 'example_data/EpisodeData_2018-04-08_212624.csv'
+FILENAME = 'example_data/EpisodeData_2018-04-27_174916.csv'
 
 # Files have data saved as:
 # Time (s), Angle (rad), Angle (rad/s), Trolley Pos (m), Trolly Vel (m/s), Trolley Accel (m/s^2), Reward
diff --git a/OpenAI Gym/openAI_massSpringContinuous_randomAction.py b/OpenAI Gym/openAI_massSpringContinuous_randomAction.py
@@ -22,7 +22,7 @@
 ###############################################################################
 
 import numpy as np
-import matplotlib.pyplot as plt
+# import matplotlib.pyplot as plt
 
 import gym
 import time
diff --git a/OpenAI Gym/openAI_massSpringContinuous_test.py b/OpenAI Gym/openAI_massSpringContinuous_test.py
@@ -44,7 +44,7 @@
 
 LAYER_SIZE = 32
 NUM_HIDDEN_LAYERS = 3
-NUM_STEPS = 100000
+NUM_STEPS = 10000
 TRIAL_ID = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')
 
 # TODO: Add file picker GUI - For now, look for files with the format below
diff --git a/OpenAI Gym/openAI_massSpringContinuous_train.py b/OpenAI Gym/openAI_massSpringContinuous_train.py
@@ -45,7 +45,7 @@
 
 LAYER_SIZE = 32
 NUM_HIDDEN_LAYERS = 3
-NUM_STEPS = 500000
+NUM_STEPS = 10000
 TRIAL_ID = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')
 
 # Get the environment and extract the number of actions.