1
+ #! /usr/bin/env python
2
+
3
+ ###############################################################################
4
+ # variable_pendulum.py
5
+ #
6
+ # Defines a variable-length pendulum environment for use with the openAI Gym.
7
+ #
8
+ # NOTE: Any plotting is set up for output, not viewing on screen.
9
+ # So, it will likely be ugly on screen. The saved PDFs should look
10
+ # better.
11
+ #
12
+ # Created: 07/07/17
13
+ # - Joshua Vaughan
14
+ # - joshua.vaughan@louisiana.edu
15
+ # - http://www.ucs.louisiana.edu/~jev9637
16
+ #
17
+ # Modified:
18
+ # *
19
+ #
20
+ # TODO:
21
+ # *
22
+ ###############################################################################
23
+
24
+
25
+
26
+ import gym
27
+ from gym import spaces
28
+ from gym .utils import seeding
29
+ import logging
30
+ import numpy as np
31
+
32
+ logger = logging .getLogger (__name__ )
33
+
34
+
35
+ class VariablePendulumEnv (gym .Env ):
36
+ metadata = {
37
+ 'render.modes' : ['human' , 'rgb_array' ],
38
+ 'video.frames_per_second' : 50
39
+ }
40
+
41
+ # actions available, hoist down, do nothing, hoist up
42
+ MAX_CABLE_ACCEL = 1.0
43
+ AVAIL_CABLE_ACCEL = [- MAX_CABLE_ACCEL , 0 , MAX_CABLE_ACCEL ]
44
+
45
+ def __init__ (self ):
46
+ self .gravity = 9.8 # accel. due to gravity (m/s^2)
47
+ self .masspend = 1.0 # mass of the pendulum point mass (kg)
48
+ self .max_cable_accel = 0.25 # maximum acceleration of cable (m/s^2)
49
+ self .tau = 0.02 # seconds between state updates
50
+
51
+
52
+ # Define thesholds for failing episode
53
+ self .theta_threshold = 45 * np .pi / 360 # +/- 45 degree limit (rad)
54
+ self .l_max_threshold = 3.0 # max cable length (m)
55
+ self .l_min_threshold = 0.5 # min cable length (m)
56
+
57
+ # The action space is continuous inputs between
58
+ #self.action_space = spaces.Box(-self.max_cable_accel, self.max_cable_accel, shape = (1,))
59
+
60
+ # This action space is just hoist down, do nothing, hoist up
61
+ self .action_space = spaces .Discrete (3 )
62
+
63
+ high_limit = np .array ([2 * self .theta_threshold , # max observable angle
64
+ 10 * 2 * self .theta_threshold , # max observable angular vel.
65
+ 10 , # max observable length
66
+ 2 ]) # max observable cable vel
67
+
68
+ low_limit = np .array ([- 2 * self .theta_threshold , # max observable angle
69
+ - 10 * 2 * self .theta_threshold , # max observable angular vel.
70
+ 0 , # max observable length
71
+ - 2 ]) # max observable cable vel
72
+
73
+ self .observation_space = spaces .Box (high_limit , low_limit )
74
+
75
+ self ._seed ()
76
+ self .viewer = None
77
+ self .state = None
78
+
79
+ self .steps_beyond_done = None
80
+
81
+ def _seed (self , seed = None ):
82
+ self .np_random , seed = seeding .np_random (seed )
83
+ return [seed ]
84
+
85
+ def _step (self , action ):
86
+ assert self .action_space .contains (action ), "%r (%s) invalid" % (action , type (action ))
87
+
88
+ state = self .state
89
+ theta , theta_dot , l , l_dot = state
90
+
91
+ cable_accel = self .AVAIL_CABLE_ACCEL [action ]
92
+
93
+ theta_ddot = - l_dot / l * theta_dot - self .gravity / l * np .sin (theta )
94
+ l_ddot = cable_accel
95
+
96
+ theta = theta + self .tau * theta_dot
97
+ theta_dot = theta_dot + self .tau * theta_ddot
98
+
99
+ l = l + self .tau * l_dot
100
+ l_dot = l_dot + self .tau * l_ddot
101
+ self .state = (theta , theta_dot , l , l_dot )
102
+
103
+ done = l > self .l_max_threshold \
104
+ or l < self .l_min_threshold \
105
+ or theta < - self .theta_threshold \
106
+ or theta > self .theta_threshold
107
+
108
+ done = bool (done )
109
+
110
+ if not done :
111
+ reward = - np .abs (theta ) # a negative award for nonzero angles
112
+ else :
113
+ # if self.steps_beyond_done == 0:
114
+ # logger.warning("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
115
+ # self.steps_beyond_done += 1
116
+ reward = 0.0
117
+
118
+ return np .array (self .state ), reward , done , {}
119
+
120
+ def _reset (self ):
121
+ # self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
122
+ self .state = np .array ([10 * np .pi / 180 , 0 , 2 , 0 ]) + self .np_random .uniform (low = - 0.1 , high = 0.1 , size = (4 ,))
123
+ self .steps_beyond_done = None
124
+ return np .array (self .state )
125
+
126
+ def _render (self , mode = 'human' , close = False ):
127
+ if close :
128
+ if self .viewer is not None :
129
+ self .viewer .close ()
130
+ self .viewer = None
131
+ return
132
+
133
+ screen_width = 600
134
+ screen_height = 400
135
+
136
+ world_width = 2 * self .l_max_threshold # * np.sin(self.theta_threshold)
137
+ scale = screen_width / world_width
138
+ cable_pin = screen_height - 10
139
+ payload_size = 10.0
140
+ cable_width = 2.0
141
+
142
+
143
+ theta , theta_dot , l , l_dot = self .state
144
+
145
+ if self .viewer is None :
146
+ self .l_init = l # save the initial length for scaling cable
147
+ from gym .envs .classic_control import rendering
148
+ self .viewer = rendering .Viewer (screen_width , screen_height )
149
+
150
+ # define the cable as a polygon, so we can change its length later
151
+ l ,r ,t ,b = - cable_width / 2 , cable_width / 2 , cable_width / 2 , - l * scale - cable_width / 2
152
+ self .cable = rendering .FilledPolygon ([(l ,b ), (l ,t ), (r ,t ), (r ,b )])
153
+ self .cabletrans = rendering .Transform (translation = (screen_width / 2 , cable_pin ))
154
+ self .cable .add_attr (self .cabletrans )
155
+ self .cable .set_color (0.25 ,0.25 ,0.25 ) # darj gray
156
+ self .viewer .add_geom (self .cable )
157
+
158
+ # the payload is a circle.
159
+ self .payload = rendering .make_circle (payload_size )
160
+ self .payloadtrans = rendering .Transform (translation = (screen_width / 2 , cable_pin - l * scale ))
161
+ self .payload .add_attr (self .payloadtrans )
162
+ self .payload .set_color (0.5 ,0.5 ,0.5 ) # dark gray
163
+ self .viewer .add_geom (self .payload )
164
+
165
+
166
+ if self .state is None :
167
+ return None
168
+
169
+ # calculate the payload position in the window, then move it there
170
+ payload_screen_x = screen_width / 2 + l * np .sin (theta )* scale
171
+ payload_screen_y = cable_pin - l * np .cos (theta )* scale
172
+ self .payloadtrans .set_translation (payload_screen_x , payload_screen_y )
173
+
174
+ # rotate the cable
175
+ self .cabletrans .set_rotation (theta )
176
+
177
+ # change its length by scaling its length relative to its initial length
178
+ self .cabletrans .set_scale (1 , l / self .l_init )
179
+
180
+ return self .viewer .render (return_rgb_array = mode == 'rgb_array' )
0 commit comments