-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathopenAI_massSpringContinuous_randomAction.py
executable file
·65 lines (54 loc) · 2 KB
/
openAI_massSpringContinuous_randomAction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#! /usr/bin/env python
###############################################################################
# openAI_variableLengthPendulum.py
#
# script to run the variable lenght pendulum OpenAI environment
#
# NOTE: Any plotting is set up for output, not viewing on screen.
# So, it will likely be ugly on screen. The saved PDFs should look
# better.
#
# Created: 07/07/17
# - Joshua Vaughan
# - http://www.ucs.louisiana.edu/~jev9637
#
# Modified:
# *
#
# TODO:
# *
###############################################################################
import numpy as np
# import matplotlib.pyplot as plt
import gym
import time
import mass_spring_damper_continuous
env = gym.make('mass_spring_damper_continuous-v0')
m1 = 1
m2 = 1
# run 5 episodes of 1000 timesteps, taking random actions at each step
for i_episode in range(5):
observation = env.reset()
for t in range(1000):
env.render()
# just randomly choose an action
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
x1, x1_dot, x2, x2_dot = observation
COM_position = (m1 * x1 + m2 * x2) / 2
# Finally, print the updated state of the system
print("\033[2J\033[;H") # Clear the terminal each time
j = 40 # padding
d = '.' # what to fill with
print("x1 (m).:".ljust(j,d), '{:+8.3f}'.format(observation[0]))
print("x1_dot (m/s):".ljust(j,d), '{:+8.3f}'.format(observation[1]))
print("x2 (m):".ljust(j,d), '{:+8.3f}'.format(observation[2]))
print("x2_dot (m/s):".ljust(j,d), '{:+8.3f}'.format(observation[3]))
print("Force Input (N):".ljust(j,d), '{:+8.3f}'.format(action[0]))
print("Reward:".ljust(j,d), '{:+8.3f}'.format(reward))
# if episode finishes before full time range, notify
if done:
print("\r\nEpisode finished after {} timesteps".format(t+1))
time.sleep(1)
break