Updates to OpenAI mass-spring-damper system and training

DocVaughan · DocVaughan · commit 5debfd005b55 · 2018-05-10T13:31:03.000-07:00
diff --git a/OpenAI Gym/mass_spring_damper_continuous/mass_spring_damper_continuous.py b/OpenAI Gym/mass_spring_damper_continuous/mass_spring_damper_continuous.py
@@ -31,7 +31,7 @@
 import datetime # for unique filenames
 
 # Import the ODE solver
-from scipy.integrate import odeint
+from scipy.integrate import solve_ivp
 
 logger = logging.getLogger(__name__)
 
@@ -87,7 +87,7 @@ def __init__(self):
         self.done = False
         self.force = 0.0
         
-    def eq_of_motion(self, w, t):
+    def eq_of_motion(self, t, w):
         """
         Defines the differential equations for the coupled spring-mass system.
 
@@ -125,15 +125,23 @@ def _step(self, action):
         x0 = [x1, x1_dot, x2, x2_dot]
 
         # Call the ODE solver.
-        resp = odeint(self.eq_of_motion, 
-                      x0, 
-                      [0, self.tau], 
-                      hmax=self.max_step, 
-                      atol=self.abserr, 
-                      rtol=self.relerr)
+        solution = solve_ivp(self.eq_of_motion,
+                             [0, self.tau], 
+                              x0, 
+                              max_step=self.max_step, 
+                              atol=self.abserr, 
+                              rtol=self.relerr)
         
-        print(resp)
+        resp = solution.y
         
+        x1 = resp[0, -1]
+        x1_dot = resp[1, -1]
+        x2 = resp[2, -1]
+        x2_dot = resp[3, -1]
+        
+        # TODO: 05/08/18 - JEV - 
+        # Remove after testing. Using a proper ODE solver as above is the more
+        # scalable approach
         # Update m1 states
 #         x1_accel = 1 / self.m1 * (self.k * (x2 - x1 - self.spring_equil) + 
 #                                   self.c * (x2_dot - x1_dot) + 
@@ -158,11 +166,8 @@ def _step(self, action):
 #         x2_dot = np.clip(x2_dot, -self.mass_vel_threshold, self.mass_vel_threshold)
 #        
 #         x2  = x2 + self.tau * x2_dot
-        
-        x1 = resp[-1, 0]
-        x1_dot = resp[-1, 0]
-        x2 = resp[-1, 2]
-        x2_dot = resp[-1, 3]
+#       End of block to remove after testing
+
 
         self.state = (x1, x1_dot, x2, x2_dot)
         
@@ -186,14 +191,18 @@ def _step(self, action):
         distance_to_target = self.desired_position - x2
         
         # clip the reward to +/-10
-        reward = np.clip(-10*distance_to_target**2 - self.force**2, -10, 1)
+        reward = np.clip(-10*distance_to_target**2 - 0.01*self.force**2, -1, 1)
         
         if self.SAVE_DATA:
             current_data = np.array([self.counter * self.tau, x1, x1_dot, x2, x2_dot, self.force, reward])
             self.episode_data[self.counter, :] = current_data
 
         if self.counter >= self.MAX_STEPS:
             self.done = True
+            
+        if limits:
+            reward = -1000
+            self.done = True
         
         if self.done == True and self.SAVE_DATA:
             header = header='Time (s), x1 (m), x1_dot (m/s), x2 (m), x2_dot (m/s), Force (N), Reward'
diff --git a/OpenAI Gym/openAI_massSpringContinuous_train.py b/OpenAI Gym/openAI_massSpringContinuous_train.py
@@ -43,9 +43,9 @@
 
 ENV_NAME = 'mass_spring_damper_continuous-v0'
 
-LAYER_SIZE = 32
+LAYER_SIZE = 512
 NUM_HIDDEN_LAYERS = 3
-NUM_STEPS = 10000
+NUM_STEPS = 1000000
 TRIAL_ID = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')
 
 # Get the environment and extract the number of actions.
@@ -65,7 +65,7 @@
                                                                  NUM_STEPS,
                                                                  TRIAL_ID)
 
-# env = gym.wrappers.Monitor(env, MONITOR_FILENAME, video_callable=False, force=True)
+env = gym.wrappers.Monitor(env, MONITOR_FILENAME, video_callable=False, force=True)
 
 
 
@@ -148,4 +148,4 @@
 agent.save_weights(filename, overwrite=True)
 
 # Finally, evaluate our algorithm for 5 episodes.
-agent.test(env, visualize=True) #nb_max_episode_steps=500,
+# agent.test(env, visualize=True) #nb_max_episode_steps=500,