31
31
import datetime # for unique filenames
32
32
33
33
# Import the ODE solver
34
- from scipy .integrate import odeint
34
+ from scipy .integrate import solve_ivp
35
35
36
36
logger = logging .getLogger (__name__ )
37
37
@@ -87,7 +87,7 @@ def __init__(self):
87
87
self .done = False
88
88
self .force = 0.0
89
89
90
- def eq_of_motion (self , w , t ):
90
+ def eq_of_motion (self , t , w ):
91
91
"""
92
92
Defines the differential equations for the coupled spring-mass system.
93
93
@@ -125,15 +125,23 @@ def _step(self, action):
125
125
x0 = [x1 , x1_dot , x2 , x2_dot ]
126
126
127
127
# Call the ODE solver.
128
- resp = odeint (self .eq_of_motion ,
129
- x0 ,
130
- [ 0 , self . tau ] ,
131
- hmax = self .max_step ,
132
- atol = self .abserr ,
133
- rtol = self .relerr )
128
+ solution = solve_ivp (self .eq_of_motion ,
129
+ [ 0 , self . tau ] ,
130
+ x0 ,
131
+ max_step = self .max_step ,
132
+ atol = self .abserr ,
133
+ rtol = self .relerr )
134
134
135
- print ( resp )
135
+ resp = solution . y
136
136
137
+ x1 = resp [0 , - 1 ]
138
+ x1_dot = resp [1 , - 1 ]
139
+ x2 = resp [2 , - 1 ]
140
+ x2_dot = resp [3 , - 1 ]
141
+
142
+ # TODO: 05/08/18 - JEV -
143
+ # Remove after testing. Using a proper ODE solver as above is the more
144
+ # scalable approach
137
145
# Update m1 states
138
146
# x1_accel = 1 / self.m1 * (self.k * (x2 - x1 - self.spring_equil) +
139
147
# self.c * (x2_dot - x1_dot) +
@@ -158,11 +166,8 @@ def _step(self, action):
158
166
# x2_dot = np.clip(x2_dot, -self.mass_vel_threshold, self.mass_vel_threshold)
159
167
#
160
168
# x2 = x2 + self.tau * x2_dot
161
-
162
- x1 = resp [- 1 , 0 ]
163
- x1_dot = resp [- 1 , 0 ]
164
- x2 = resp [- 1 , 2 ]
165
- x2_dot = resp [- 1 , 3 ]
169
+ # End of block to remove after testing
170
+
166
171
167
172
self .state = (x1 , x1_dot , x2 , x2_dot )
168
173
@@ -186,14 +191,18 @@ def _step(self, action):
186
191
distance_to_target = self .desired_position - x2
187
192
188
193
# clip the reward to +/-10
189
- reward = np .clip (- 10 * distance_to_target ** 2 - self .force ** 2 , - 10 , 1 )
194
+ reward = np .clip (- 10 * distance_to_target ** 2 - 0.01 * self .force ** 2 , - 1 , 1 )
190
195
191
196
if self .SAVE_DATA :
192
197
current_data = np .array ([self .counter * self .tau , x1 , x1_dot , x2 , x2_dot , self .force , reward ])
193
198
self .episode_data [self .counter , :] = current_data
194
199
195
200
if self .counter >= self .MAX_STEPS :
196
201
self .done = True
202
+
203
+ if limits :
204
+ reward = - 1000
205
+ self .done = True
197
206
198
207
if self .done == True and self .SAVE_DATA :
199
208
header = header = 'Time (s), x1 (m), x1_dot (m/s), x2 (m), x2_dot (m/s), Force (N), Reward'
0 commit comments