cutalion
diff --git a/‎bot/models/bot.onnx‎
19 Bytes b/‎bot/models/bot.onnx‎
19 Bytes
diff --git a/‎bot/models/bot.onnx.data‎
0 Bytes b/‎bot/models/bot.onnx.data‎
0 Bytes
diff --git a/‎bot/models/bot_easy.onnx‎
-6 Bytes b/‎bot/models/bot_easy.onnx‎
-6 Bytes
diff --git a/‎bot/models/bot_easy.onnx.data‎
0 Bytes b/‎bot/models/bot_easy.onnx.data‎
0 Bytes
diff --git a/‎bot/models/bot_hard.onnx‎
-6 Bytes b/‎bot/models/bot_hard.onnx‎
-6 Bytes
diff --git a/‎bot/models/bot_hard.onnx.data‎
0 Bytes b/‎bot/models/bot_hard.onnx.data‎
0 Bytes
diff --git a/‎bot/models/bot_medium.onnx‎
-6 Bytes b/‎bot/models/bot_medium.onnx‎
-6 Bytes
diff --git a/‎bot/models/bot_medium.onnx.data‎
0 Bytes b/‎bot/models/bot_medium.onnx.data‎
0 Bytes
diff --git a/‎bot/training/export.py‎
Lines changed: 2 additions & 2 deletions b/‎bot/training/export.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎bot/training/model.py‎
Lines changed: 39 additions & 14 deletions b/‎bot/training/model.py‎
Lines changed: 39 additions & 14 deletions
@@ -11,8 +11,8 @@
 from model import PPONet
 
 
-def export_onnx(checkpoint_path: str, output_path: str):
-    net = PPONet()
+def export_onnx(checkpoint_path: str, output_path: str, filters: int = 64, num_res_blocks: int = 0):
+    net = PPONet(filters=filters, num_res_blocks=num_res_blocks)
     checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     net.load_state_dict(checkpoint["model_state_dict"])
     net.eval()
 
@@ -2,33 +2,58 @@
 
 Architecture (from research.md):
 - Input: (batch, 19, 4, 4) state tensor
-- Body: Conv2d(19→64, 3x3, pad=1) → ReLU → Conv2d(64→64, 3x3, pad=1) → ReLU → Flatten
-- Actor head: Linear(1024→320) → action logits
-- Critic head: Linear(1024→1) → state value
+- Body: Conv2d(19→F, 3x3, pad=1) → ReLU → [optional residual blocks] → Flatten
+- Actor head: Linear(F*16→320) → action logits
+- Critic head: Linear(F*16→1) → state value
+
+Default (small): F=64, 0 residual blocks (~380K params)
+Large: F=128, 4 residual blocks (~2M params)
 """
 
 import torch
 import torch.nn as nn
 from env import ACTION_SPACE_SIZE, BOARD_SIZE, NUM_CHANNELS
 
 
+class ResBlock(nn.Module):
+    """Residual block: conv → BN → ReLU → conv → BN → skip add → ReLU."""
+
+    def __init__(self, filters: int):
+        super().__init__()
+        self.conv1 = nn.Conv2d(filters, filters, kernel_size=3, padding=1)
+        self.bn1 = nn.BatchNorm2d(filters)
+        self.conv2 = nn.Conv2d(filters, filters, kernel_size=3, padding=1)
+        self.bn2 = nn.BatchNorm2d(filters)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        residual = x
+        out = torch.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        return torch.relu(out + residual)
+
+
 class PPONet(nn.Module):
     """Actor-critic network for PPO self-play training."""
 
-    def __init__(self):
+    def __init__(self, filters: int = 64, num_res_blocks: int = 0):
         super().__init__()
-        filters_num = 64
-        output_size = filters_num * BOARD_SIZE * BOARD_SIZE
+        output_size = filters * BOARD_SIZE * BOARD_SIZE
 
-        self.body = nn.Sequential(
-            nn.Conv2d(
-                NUM_CHANNELS, filters_num, kernel_size=3, padding=1
-            ),  # input layer
-            nn.ReLU(),
-            nn.Conv2d(filters_num, filters_num, kernel_size=3, padding=1),
+        layers = [
+            nn.Conv2d(NUM_CHANNELS, filters, kernel_size=3, padding=1),
             nn.ReLU(),
-            nn.Flatten(),  # gives (batch, filters_num * BOARD_SIZE * BOARD_SIZE) = 64 * 4 * 4 = 1024
-        )
+        ]
+
+        if num_res_blocks > 0:
+            for _ in range(num_res_blocks):
+                layers.append(ResBlock(filters))
+        else:
+            # Original architecture: second conv + ReLU
+            layers.append(nn.Conv2d(filters, filters, kernel_size=3, padding=1))
+            layers.append(nn.ReLU())
+
+        layers.append(nn.Flatten())
+        self.body = nn.Sequential(*layers)
 
         self.actor_head = nn.Linear(output_size, ACTION_SPACE_SIZE)
         self.critic_head = nn.Linear(output_size, 1)