Lab41 · alnaeini · Aug 16, 2019 · Aug 17, 2019 · Aug 17, 2019 · Aug 17, 2019
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,35 @@
+absl-py==0.7.1
+animalai==1.0.3
+animalai-train==1.0.3
+astor==0.8.0
+atari-py==0.2.6
+cloudpickle==1.2.1
+cycler==0.10.0
+dopamine-rl==2.0.5
+future==0.17.1
+gast==0.2.2
+gin-config==0.2.0
+grpcio==1.11.1
+gym==0.13.1
+h5py==2.9.0
+jsonpickle==1.2
+Keras-Applications==1.0.8
+Keras-Preprocessing==1.1.0
+kiwisolver==1.1.0
+Markdown==3.1.1
+matplotlib==3.1.1
+numpy==1.14.5
+opencv-python==4.1.0.25
+Pillow==5.4.1
+protobuf==3.6.1
+pyglet==1.3.2
+pyparsing==2.4.0
+python-dateutil==2.8.0
+PyYAML==5.1.1
+scipy==1.3.0
+six==1.12.0
+tensorboard==1.12.2
+tensorflow==1.12.2
+termcolor==1.1.0
+torch==1.2.0
+Werkzeug==0.15.5
diff --git a/trainers/a3c_src/env.py b/trainers/a3c_src/env.py
@@ -0,0 +1,108 @@
+"""
+@author: Viet Nguyen <[email protected]>
+"""
+
+import gym_super_mario_bros
+from gym.spaces import Box
+from gym import Wrapper
+from nes_py.wrappers import BinarySpaceToDiscreteSpaceEnv
+from gym_super_mario_bros.actions import SIMPLE_MOVEMENT, COMPLEX_MOVEMENT, RIGHT_ONLY
+import cv2
+import numpy as np
+import subprocess as sp
+
+
+class Monitor:
+    def __init__(self, width, height, saved_path):
+
+        self.command = ["ffmpeg", "-y", "-f", "rawvideo", "-vcodec", "rawvideo", "-s", "{}X{}".format(width, height),
+                        "-pix_fmt", "rgb24", "-r", "80", "-i", "-", "-an", "-vcodec", "mpeg4", saved_path]
+        try:
+            self.pipe = sp.Popen(self.command, stdin=sp.PIPE, stderr=sp.PIPE)
+        except FileNotFoundError:
+            pass
+
+    def record(self, image_array):
+        self.pipe.stdin.write(image_array.tostring())
+
+
+def process_frame(frame):
+    if frame is not None:
+        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
+        frame = cv2.resize(frame, (84, 84))[None, :, :] / 255.
+        return frame
+    else:
+        return np.zeros((1, 84, 84))
+
+
+class CustomReward(Wrapper):
+    def __init__(self, env=None, monitor=None):
+        super(CustomReward, self).__init__(env)
+        self.observation_space = Box(low=0, high=255, shape=(1, 84, 84))
+        self.curr_score = 0
+        if monitor:
+            self.monitor = monitor
+        else:
+            self.monitor = None
+
+    def step(self, action):
+        state, reward, done, info = self.env.step(action)
+        if self.monitor:
+            self.monitor.record(state)
+        state = process_frame(state)
+        reward += (info["score"] - self.curr_score) / 40.
+        self.curr_score = info["score"]
+        if done:
+            if info["flag_get"]:
+                reward += 50
+            else:
+                reward -= 50
+        return state, reward / 10., done, info
+
+    def reset(self):
+        self.curr_score = 0
+        return process_frame(self.env.reset())
+
+
+class CustomSkipFrame(Wrapper):
+    def __init__(self, env, skip=4):
+        super(CustomSkipFrame, self).__init__(env)
+        self.observation_space = Box(low=0, high=255, shape=(4, 84, 84))
+        self.skip = skip
+
+    def step(self, action):
+        total_reward = 0
+        states = []
+        state, reward, done, info = self.env.step(action)
+        for i in range(self.skip):
+            if not done:
+                state, reward, done, info = self.env.step(action)
+                total_reward += reward
+                states.append(state)
+            else:
+                states.append(state)
+        states = np.concatenate(states, 0)[None, :, :, :]
+        return states.astype(np.float32), reward, done, info
+
+    def reset(self):
+        state = self.env.reset()
+        states = np.concatenate([state for _ in range(self.skip)], 0)[None, :, :, :]
+        return states.astype(np.float32)
+
+
+def create_train_env(world, stage, action_type, output_path=None):
+    env = gym_super_mario_bros.make("SuperMarioBros-{}-{}-v0".format(world, stage))
+    if output_path:
+        monitor = Monitor(256, 240, output_path)
+    else:
+        monitor = None
+    if action_type == "right":
+        actions = RIGHT_ONLY
+    elif action_type == "simple":
+        actions = SIMPLE_MOVEMENT
+    else:
+        actions = COMPLEX_MOVEMENT
+    env = BinarySpaceToDiscreteSpaceEnv(env, actions)
+    env = CustomReward(env, monitor)
+    env = CustomSkipFrame(env)
+    return env, env.observation_space.shape[0], len(actions)
diff --git a/trainers/a3c_src/model.py b/trainers/a3c_src/model.py
@@ -0,0 +1,70 @@
+"""
+@author: Viet Nguyen <[email protected]>
+"""
+
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class ActorCritic(nn.Module):
+    def __init__(self, num_inputs, num_actions):
+        super(ActorCritic, self).__init__()
+        self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1)
+        self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
+        self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
+        self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
+        self.lstm = nn.LSTMCell(32 * 6 * 6, 512)
+        self.critic_linear = nn.Linear(512, 1)
+        self.actor_linear = nn.Linear(512, num_actions)
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        for module in self.modules():
+            if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
+                nn.init.xavier_uniform_(module.weight)
+                # nn.init.kaiming_uniform_(module.weight)
+                nn.init.constant_(module.bias, 0)
+            elif isinstance(module, nn.LSTMCell):
+                nn.init.constant_(module.bias_ih, 0)
+                nn.init.constant_(module.bias_hh, 0)
+
+    def forward(self, x, hx, cx):
+        x = F.relu(self.conv1(x))
+        x = F.relu(self.conv2(x))
+        x = F.relu(self.conv3(x))
+        x = F.relu(self.conv4(x))
+        hx, cx = self.lstm(x.view(x.size(0), -1), (hx, cx))
+        return self.actor_linear(hx), self.critic_linear(hx), hx, cx
+
+
+
+class Mapper(nn.Module):
+    def __init__(self, num_inputs):
+        super(Mapper, self).__init__()
+        self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1)
+        self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
+        self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
+        self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
+        self.lstm = nn.LSTMCell(32 * 6 * 6, 400)
+        self.map_final = nn.Linear(400, 1600)
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        for module in self.modules():
+            if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
+                nn.init.xavier_uniform_(module.weight)
+                # nn.init.kaiming_uniform_(module.weight)
+                nn.init.constant_(module.bias, 0)
+            elif isinstance(module, nn.LSTMCell):
+                nn.init.constant_(module.bias_ih, 0)
+                nn.init.constant_(module.bias_hh, 0)
+
+    def forward(self, x, hx, cx):
+        x = F.relu(self.conv1(x))
+        x = F.relu(self.conv2(x))
+        x = F.relu(self.conv3(x))
+        x = F.relu(self.conv4(x))
+        hx, cx = self.lstm(x.view(x.size(0), -1), (hx, cx))
+        return self.map_final(hx), hx, cx
+
+
diff --git a/trainers/a3c_src/optimizer.py b/trainers/a3c_src/optimizer.py
@@ -0,0 +1,18 @@
+"""
+@author: Viet Nguyen <[email protected]>
+"""
+
+import torch
+
+class GlobalAdam(torch.optim.Adam):
+    def __init__(self, params, lr):
+        super(GlobalAdam, self).__init__(params, lr=lr)
+        for group in self.param_groups:
+            for p in group['params']:
+                state = self.state[p]
+                state['step'] = 0
+                state['exp_avg'] = torch.zeros_like(p.data)
+                state['exp_avg_sq'] = torch.zeros_like(p.data)
+
+                state['exp_avg'].share_memory_()
+                state['exp_avg_sq'].share_memory_()