-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathopenAI_planarCraneContinuous_randomAction.py
executable file
·58 lines (50 loc) · 1.89 KB
/
openAI_planarCraneContinuous_randomAction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#! /usr/bin/env python
###############################################################################
# openAI_variableLengthPendulum.py
#
# script to run the variable lenght pendulum OpenAI environment
#
# NOTE: Any plotting is set up for output, not viewing on screen.
# So, it will likely be ugly on screen. The saved PDFs should look
# better.
#
# Created: 07/07/17
# - Joshua Vaughan
# - http://www.ucs.louisiana.edu/~jev9637
#
# Modified:
# *
#
# TODO:
# *
###############################################################################
import numpy as np
import matplotlib.pyplot as plt
import gym
import time
import planar_crane_continuous
env = gym.make('planar_crane_continuous-v0')
# run 5 episodes of 1000 timesteps, taking random actions at each step
for i_episode in range(5):
observation = env.reset()
for t in range(1000):
env.render()
# just randomly choose an action
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
# Finally, print the updated state of the system
print("\033[2J\033[;H") # Clear the terminal each time
j = 40 # padding
d = '.' # what to fill with
print("Theta (deg).:".ljust(j,d), '{:+8.3f}'.format(observation[0]*180/np.pi))
print("Theta_dot (deg/s):".ljust(j,d), '{:+8.3f}'.format(observation[1]*180/np.pi))
print("x (m):".ljust(j,d), '{:+8.3f}'.format(observation[2]))
print("x_dot (m/s):".ljust(j,d), '{:+8.3f}'.format(observation[3]))
print("x_ddot (m/s^2) Input:".ljust(j,d), '{:+8.3f}'.format(action[0]))
print("Reward:".ljust(j,d), '{:+8.3f}'.format(reward))
# if episode finishes before full time range, notify
if done:
print("\r\nEpisode finished after {} timesteps".format(t+1))
time.sleep(1)
break