1
+ #! /usr/bin/env python
2
+
3
+ ###############################################################################
4
+ # mass_spring_damper_continuous.py
5
+ #
6
+ # Defines a openAI Gym environment for use a two mass-spring-damper system.
7
+ # This version has a continuous range of inputs for the m1 force input.
8
+ #
9
+ # NOTE: Any plotting is set up for output, not viewing on screen.
10
+ # So, it will likely be ugly on screen. The saved PDFs should look
11
+ # better.
12
+ #
13
+ # Created: 04/08/18
14
+ # - Joshua Vaughan
15
+
16
+ # - http://www.ucs.louisiana.edu/~jev9637
17
+ #
18
+ # Modified:
19
+ # *
20
+ #
21
+ # TODO:
22
+ # *
23
+ ###############################################################################
24
+
25
+
26
+ import gym
27
+ from gym import spaces
28
+ from gym .utils import seeding
29
+ import logging
30
+ import numpy as np
31
+ import datetime # for unique filenames
32
+
33
+ logger = logging .getLogger (__name__ )
34
+
35
+
36
+ class MassSpringDamperContEnv (gym .Env ):
37
+ metadata = {
38
+ 'render.modes' : ['human' , 'rgb_array' ],
39
+ 'video.frames_per_second' : 50
40
+ }
41
+
42
+
43
+ def __init__ (self ):
44
+ self .m1 = 1.0 # mass of the mass 1 (kg)
45
+ self .m2 = 1.0 # mass of the mass 2(kg)
46
+ self .k = 2 * np .pi ** 2 # spring constant (N/m)
47
+ self .spring_equil = 1.0 # equilibrium length of spring 0.25 m
48
+ self .c = 0.0 # damping coefficient (Ns/m)
49
+
50
+ self .tau = 0.02 # seconds between state updates
51
+ self .counter = 0 # counter for number of steps
52
+ self .desired_position = 0 # desired final position of system
53
+ self .max_force = 25.0 # maximum force allowed (N)
54
+ self .SAVE_DATA = False # set True to save episode data
55
+ self .MAX_STEPS = 500 # maximum number of steps to run
56
+
57
+ # Define thesholds for trial limits, penalized heavily for exceeding these
58
+ self .mass_pos_threshold = 4.0 # max mass position (m)
59
+ self .mass_vel_threshold = 0.5 # max mass velocity (m/s)
60
+
61
+ # This action space is the range of acceleration of the trolley
62
+ self .action_space = spaces .Box (low = - self .max_force ,
63
+ high = self .max_force ,
64
+ shape = (1 ,))
65
+
66
+ high_limit = np .array ([self .mass_pos_threshold , # max observable angle
67
+ self .mass_vel_threshold , # max observable angular vel.
68
+ self .mass_pos_threshold , # max observable position
69
+ self .mass_vel_threshold ]) # max observable cable vel
70
+
71
+ low_limit = - high_limit # limits are symmetric about 0
72
+
73
+ self .observation_space = spaces .Box (high_limit , low_limit )
74
+
75
+ self ._seed ()
76
+ self .viewer = None
77
+ self .state = None
78
+ self .done = False
79
+ self .force = 0.0
80
+
81
+ def _seed (self , seed = None ):
82
+ self .np_random , seed = seeding .np_random (seed )
83
+ return [seed ]
84
+
85
+ def _step (self , action ):
86
+ x1 , x1_dot , x2 , x2_dot = self .state
87
+ self .counter = self .counter + 1
88
+
89
+ # Get the action and clip it to the min/max trolley accel
90
+ self .force = np .clip (action [0 ], - self .max_force , self .max_force )
91
+
92
+
93
+ # Update m1 states
94
+ x1_accel = 1 / self .m1 * (self .k * (x2 - x1 - self .spring_equil ) +
95
+ self .c * (x2_dot - x1_dot ) +
96
+ self .force )
97
+
98
+
99
+ x1_dot = x1_dot + self .tau * x1_accel
100
+
101
+ # Get the action and clip it to the min/max m1 vel
102
+ x1_dot = np .clip (x1_dot , - self .mass_vel_threshold , self .mass_vel_threshold )
103
+
104
+ x1 = x1 + self .tau * x1_dot
105
+
106
+ # Update m2 states
107
+ x2_accel = 1 / self .m2 * (- self .k * (x2 - x1 - self .spring_equil ) +
108
+ - self .c * (x2_dot - x1_dot ))
109
+
110
+
111
+ x2_dot = x2_dot + self .tau * x2_accel
112
+
113
+ # Get the action and clip it to the min/max m2 accel
114
+ x2_dot = np .clip (x2_dot , - self .mass_vel_threshold , self .mass_vel_threshold )
115
+
116
+ x2 = x2 + self .tau * x2_dot
117
+
118
+ self .state = (x1 , x1_dot , x2 , x2_dot )
119
+
120
+ # Define a boolean on whether we're exceeding limits or not. We'll just penalize
121
+ # any of these conditions identically in the reward function
122
+ limits = x1 > self .mass_pos_threshold \
123
+ or x1 < - self .mass_pos_threshold \
124
+ or x1_dot > self .mass_vel_threshold \
125
+ or x1_dot < - self .mass_vel_threshold \
126
+ or x2 > self .mass_pos_threshold \
127
+ or x2 < - self .mass_pos_threshold \
128
+ or x2_dot > self .mass_vel_threshold \
129
+ or x2_dot < - self .mass_vel_threshold \
130
+
131
+
132
+ # TODO: 04/08/18 - This has *huge* effect on the outcome. Decide "optimal" reward scheme.
133
+ # Use COM position
134
+ #distance_to_target = self.desired_position - (self.m1 * x1 + self.m2 * x2) / (self.m1 + self.m2)
135
+
136
+ # Use m2 position
137
+ distance_to_target = self .desired_position - x2
138
+
139
+ # clip the reward to +/-10
140
+ reward = np .clip (- 10 * distance_to_target ** 2 - self .force ** 2 , - 10 , 1 )
141
+
142
+ if self .SAVE_DATA :
143
+ current_data = np .array ([self .counter * self .tau , x1 , x1_dot , x2 , x2_dot , self .force , reward ])
144
+ self .episode_data [self .counter , :] = current_data
145
+
146
+ if self .counter >= self .MAX_STEPS :
147
+ self .done = True
148
+
149
+ if self .done == True and self .SAVE_DATA :
150
+ header = header = 'Time (s), x1 (m), x1_dot (m/s), x2 (m), x2_dot (m/s), Force (N), Reward'
151
+ data_filename = 'example_data/EpisodeData_{}.csv' .format (datetime .datetime .now ().strftime ('%Y-%m-%d_%H%M%S' ))
152
+ np .savetxt (data_filename , self .episode_data , header = header , delimiter = ',' )
153
+
154
+ return np .array (self .state ), reward , self .done , {}
155
+
156
+ def _reset (self ):
157
+ # TODO: 07/07/17 - Probably need more randomness in initial conditions
158
+ rand_pos = self .np_random .uniform (low = - 3.0 , high = 3.0 )
159
+
160
+ self .state = np .array ([rand_pos ,
161
+ 0 ,
162
+ rand_pos + self .spring_equil ,
163
+ 0 ])
164
+
165
+ # Reset the counter and the data recorder array
166
+ self .counter = 0
167
+
168
+ # Reset the done flag
169
+ self .done = False
170
+
171
+ if self .SAVE_DATA :
172
+ self .episode_data = np .zeros ((self .MAX_STEPS + 1 , 7 ))
173
+ self .episode_data [0 ,:] = np .array ([0 , self .state [0 ], self .state [1 ], self .state [2 ], self .state [3 ], 0 , 0 ])
174
+
175
+ return np .array (self .state )
176
+
177
+ def _render (self , mode = 'human' , close = False ):
178
+ if close :
179
+ if self .viewer is not None :
180
+ self .viewer .close ()
181
+ self .viewer = None
182
+ return
183
+
184
+ screen_width = 600
185
+ screen_height = 400
186
+
187
+ world_width = 1.5 * self .mass_pos_threshold
188
+ scale = screen_width / world_width # Scale according to width
189
+
190
+
191
+ # Define the mass size and its offset from the bottom of the screen (pixels)
192
+ mass_width = 30.0
193
+ mass_height = 30.0
194
+ mass_yOffset = screen_height / 2
195
+
196
+ target_size = 10
197
+ COM_size = 5
198
+
199
+ x1 , x1_dot , x2 , x2_dot = self .state
200
+
201
+ COM_position = (self .m1 * x1 + self .m2 * (x2 )) / (self .m1 + self .m2 )
202
+
203
+ if self .viewer is None : # Initial scene setup
204
+ from gym .envs .classic_control import rendering
205
+ self .viewer = rendering .Viewer (screen_width , screen_height )
206
+
207
+ # the target is a series of circles, a bullseye
208
+ self .target = rendering .make_circle (target_size * 2 )
209
+ self .targettrans = rendering .Transform (translation = (screen_width / 2 + self .desired_position * scale , mass_yOffset ))
210
+ self .target .add_attr (self .targettrans )
211
+ self .target .set_color (1 ,0 ,0 ) # red
212
+ self .viewer .add_geom (self .target )
213
+
214
+ self .target = rendering .make_circle (target_size * 1.25 )
215
+ self .targettrans = rendering .Transform (translation = (screen_width / 2 + self .desired_position * scale , mass_yOffset ))
216
+ self .target .add_attr (self .targettrans )
217
+ self .target .set_color (1 ,1 ,1 ) # white
218
+ self .viewer .add_geom (self .target )
219
+
220
+ self .target = rendering .make_circle (target_size / 2 )
221
+ self .targettrans = rendering .Transform (translation = (screen_width / 2 + self .desired_position * scale , mass_yOffset ))
222
+ self .target .add_attr (self .targettrans )
223
+ self .target .set_color (1 ,0 ,0 ) # red
224
+ self .viewer .add_geom (self .target )
225
+
226
+
227
+ # the COM is a series of circles, a bullseye
228
+ self .COM1 = rendering .make_circle (COM_size * 2 )
229
+ self .COM1trans = rendering .Transform (translation = (screen_width / 2 + COM_position * scale , mass_yOffset ))
230
+ self .COM1 .add_attr (self .COM1trans )
231
+ self .COM1 .set_color (0 ,0 ,0 ) # black
232
+ self .viewer .add_geom (self .COM1 )
233
+
234
+ self .COM2 = rendering .make_circle (COM_size * 1.25 )
235
+ self .COM2trans = rendering .Transform (translation = (screen_width / 2 + COM_position * scale , mass_yOffset ))
236
+ self .COM2 .add_attr (self .COM2trans )
237
+ self .COM2 .set_color (1 ,1 ,1 ) # white
238
+ self .viewer .add_geom (self .COM2 )
239
+
240
+ self .COM3 = rendering .make_circle (COM_size / 2 )
241
+ self .COM3trans = rendering .Transform (translation = (screen_width / 2 + COM_position * scale , mass_yOffset ))
242
+ self .COM3 .add_attr (self .COM3trans )
243
+ self .COM3 .set_color (0 ,0 ,0 ) # black
244
+ self .viewer .add_geom (self .COM3 )
245
+
246
+
247
+ # Define the trolley polygon
248
+ l ,r ,t ,b = - mass_width / 2 , mass_width / 2 , mass_height / 2 , - mass_height / 2
249
+
250
+ self .mass1 = rendering .FilledPolygon ([(l ,b ), (l ,t ), (r ,t ), (r ,b )])
251
+ self .mass1trans = rendering .Transform (translation = (screen_width / 2 + x1 * scale , mass_yOffset ))
252
+ self .mass1 .add_attr (self .mass1trans )
253
+ self .mass1 .set_color (0.85 ,0.85 ,0.85 ) # light gray
254
+ self .viewer .add_geom (self .mass1 )
255
+
256
+ self .mass2 = rendering .FilledPolygon ([(l ,b ), (l ,t ), (r ,t ), (r ,b )])
257
+ self .mass2trans = rendering .Transform (translation = (screen_width / 2 + x2 * scale , mass_yOffset ))
258
+ self .mass2 .add_attr (self .mass2trans )
259
+ self .mass2 .set_color (0.85 ,0.85 ,0.85 ) # light gray
260
+ self .viewer .add_geom (self .mass2 )
261
+
262
+
263
+ # This is a bar that shows the direction of the current accel. command
264
+ l ,r ,t ,b = - 10.0 , 10.0 , 4 , - 4
265
+ self .accel = rendering .FilledPolygon ([(l ,b ), (l ,t ), (r ,t ), (r ,b )])
266
+ self .acceltrans = rendering .Transform (translation = (screen_width / 2 + x1 * scale - mass_width / 2 , mass_yOffset ))
267
+ self .accel .add_attr (self .acceltrans )
268
+ self .accel .set_color (0.1 , 0.1 , 0.5 )
269
+ self .viewer .add_geom (self .accel )
270
+
271
+
272
+ # move the trolley
273
+ self .mass1trans .set_translation (screen_width / 2 + x1 * scale , mass_yOffset )
274
+ self .mass2trans .set_translation (screen_width / 2 + x2 * scale , mass_yOffset )
275
+ self .COM1trans .set_translation (screen_width / 2 + COM_position * scale , mass_yOffset )
276
+ self .COM2trans .set_translation (screen_width / 2 + COM_position * scale , mass_yOffset )
277
+ self .COM3trans .set_translation (screen_width / 2 + COM_position * scale , mass_yOffset )
278
+
279
+ # show the accel direction
280
+ accel_scaling = 1 / (25 * self .max_force ) * self .force * scale
281
+ self .acceltrans .set_translation (screen_width / 2 + (x1 * scale + (accel_scaling / 2 )), mass_yOffset )
282
+ self .acceltrans .set_scale (accel_scaling , 1 )
283
+
284
+ return self .viewer .render (return_rgb_array = mode == 'rgb_array' )
0 commit comments