Skip to content

Commit 442a912

Browse files
authored
Merge pull request #1 from cutalion/002-rl-bot-training
Faster training + retrained bot models
2 parents ba45f53 + 43bbd25 commit 442a912

13 files changed

Lines changed: 152 additions & 63 deletions

bot/models/bot.onnx

19 Bytes
Binary file not shown.

bot/models/bot.onnx.data

0 Bytes
Binary file not shown.

bot/models/bot_easy.onnx

-6 Bytes
Binary file not shown.

bot/models/bot_easy.onnx.data

0 Bytes
Binary file not shown.

bot/models/bot_hard.onnx

-6 Bytes
Binary file not shown.

bot/models/bot_hard.onnx.data

0 Bytes
Binary file not shown.

bot/models/bot_medium.onnx

-6 Bytes
Binary file not shown.

bot/models/bot_medium.onnx.data

0 Bytes
Binary file not shown.

bot/training/export.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
from model import PPONet
1212

1313

14-
def export_onnx(checkpoint_path: str, output_path: str):
15-
net = PPONet()
14+
def export_onnx(checkpoint_path: str, output_path: str, filters: int = 64, num_res_blocks: int = 0):
15+
net = PPONet(filters=filters, num_res_blocks=num_res_blocks)
1616
checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
1717
net.load_state_dict(checkpoint["model_state_dict"])
1818
net.eval()

bot/training/model.py

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,33 +2,58 @@
22
33
Architecture (from research.md):
44
- Input: (batch, 19, 4, 4) state tensor
5-
- Body: Conv2d(19→64, 3x3, pad=1) → ReLU → Conv2d(64→64, 3x3, pad=1) → ReLU → Flatten
6-
- Actor head: Linear(1024→320) → action logits
7-
- Critic head: Linear(1024→1) → state value
5+
- Body: Conv2d(19→F, 3x3, pad=1) → ReLU → [optional residual blocks] → Flatten
6+
- Actor head: Linear(F*16→320) → action logits
7+
- Critic head: Linear(F*16→1) → state value
8+
9+
Default (small): F=64, 0 residual blocks (~380K params)
10+
Large: F=128, 4 residual blocks (~2M params)
811
"""
912

1013
import torch
1114
import torch.nn as nn
1215
from env import ACTION_SPACE_SIZE, BOARD_SIZE, NUM_CHANNELS
1316

1417

18+
class ResBlock(nn.Module):
19+
"""Residual block: conv → BN → ReLU → conv → BN → skip add → ReLU."""
20+
21+
def __init__(self, filters: int):
22+
super().__init__()
23+
self.conv1 = nn.Conv2d(filters, filters, kernel_size=3, padding=1)
24+
self.bn1 = nn.BatchNorm2d(filters)
25+
self.conv2 = nn.Conv2d(filters, filters, kernel_size=3, padding=1)
26+
self.bn2 = nn.BatchNorm2d(filters)
27+
28+
def forward(self, x: torch.Tensor) -> torch.Tensor:
29+
residual = x
30+
out = torch.relu(self.bn1(self.conv1(x)))
31+
out = self.bn2(self.conv2(out))
32+
return torch.relu(out + residual)
33+
34+
1535
class PPONet(nn.Module):
1636
"""Actor-critic network for PPO self-play training."""
1737

18-
def __init__(self):
38+
def __init__(self, filters: int = 64, num_res_blocks: int = 0):
1939
super().__init__()
20-
filters_num = 64
21-
output_size = filters_num * BOARD_SIZE * BOARD_SIZE
40+
output_size = filters * BOARD_SIZE * BOARD_SIZE
2241

23-
self.body = nn.Sequential(
24-
nn.Conv2d(
25-
NUM_CHANNELS, filters_num, kernel_size=3, padding=1
26-
), # input layer
27-
nn.ReLU(),
28-
nn.Conv2d(filters_num, filters_num, kernel_size=3, padding=1),
42+
layers = [
43+
nn.Conv2d(NUM_CHANNELS, filters, kernel_size=3, padding=1),
2944
nn.ReLU(),
30-
nn.Flatten(), # gives (batch, filters_num * BOARD_SIZE * BOARD_SIZE) = 64 * 4 * 4 = 1024
31-
)
45+
]
46+
47+
if num_res_blocks > 0:
48+
for _ in range(num_res_blocks):
49+
layers.append(ResBlock(filters))
50+
else:
51+
# Original architecture: second conv + ReLU
52+
layers.append(nn.Conv2d(filters, filters, kernel_size=3, padding=1))
53+
layers.append(nn.ReLU())
54+
55+
layers.append(nn.Flatten())
56+
self.body = nn.Sequential(*layers)
3257

3358
self.actor_head = nn.Linear(output_size, ACTION_SPACE_SIZE)
3459
self.critic_head = nn.Linear(output_size, 1)

0 commit comments

Comments
 (0)