support disable communication option during training

acforvs · acforvs · commit 588aa7833f83 · 2023-03-13T01:18:50.000+04:00
diff --git a/config.yaml b/config.yaml
@@ -6,11 +6,13 @@ dhc:
   hidden_dim: 256
   max_comm_agents: 3 # includes the agent itself
   batch_size: 192
-  max_num_agents: 12
+  max_num_agents: 16
   latent_dim: 784  # 16 * 7 * 7, do not forget to change if the observation_shape is changed
   max_episode_length: 256
 
   communication:
+    disable_communication: 1
+    comm_enabled_prob: 0.7
     num_comm_layers: 2
     num_comm_heads: 2
 
diff --git a/pathfinding/models/dhc/model.py b/pathfinding/models/dhc/model.py
@@ -1,4 +1,5 @@
 # credits to https://github.com/ZiyuanMa/DHC/blob/master/model.py
+import random
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -269,7 +270,12 @@ def forward(self, obs, steps, hidden, comm_mask):
             # hidden size: batch_size*num_agents x self.hidden_dim
             hidden = self.recurrent(latent[i], hidden)
             hidden = hidden.view(self._batch_size, num_agents, self.hidden_dim)
-            hidden = self.comm(hidden, comm_mask[:, i])
+
+            if DHC_CONFIG["communication"]["disable_communication"]:
+                if random.random() < DHC_CONFIG["communication"]["comm_enabled_prob"]:
+                    hidden = self.comm(hidden, comm_mask[:, i])
+            else:
+                hidden = self.comm(hidden, comm_mask[:, i])
             # only hidden from agent 0
             hidden_buffer.append(hidden[:, 0])
             hidden = hidden.view(self._batch_size * num_agents, self.hidden_dim)
diff --git a/pathfinding/models/dhc/train.py b/pathfinding/models/dhc/train.py
@@ -9,9 +9,9 @@
 
 TRAIN_CONFIG = settings["dhc"]["train"]
 
-torch.manual_seed(239)
-np.random.seed(239)
-random.seed(239)
+torch.manual_seed(0)
+np.random.seed(0)
+random.seed(0)
 
 
 def main(
diff --git a/pathfinding/models/dhc/worker.py b/pathfinding/models/dhc/worker.py
@@ -371,7 +371,7 @@ def stats(self, interval: int):
                     self.stat_dict[add_agent_key] = []
 
                 if key[1] < WRK_CONFIG["max_map_length"]:
-                    add_map_key = (key[0], key[1] + 5)
+                    add_map_key = (key[0], key[1] + 10)
                     if add_map_key not in self.stat_dict:
                         self.stat_dict[add_map_key] = []