|
2 | 2 |
|
3 | 3 | Architecture (from research.md): |
4 | 4 | - Input: (batch, 19, 4, 4) state tensor |
5 | | -- Body: Conv2d(19→64, 3x3, pad=1) → ReLU → Conv2d(64→64, 3x3, pad=1) → ReLU → Flatten |
6 | | -- Actor head: Linear(1024→320) → action logits |
7 | | -- Critic head: Linear(1024→1) → state value |
| 5 | +- Body: Conv2d(19→F, 3x3, pad=1) → ReLU → [optional residual blocks] → Flatten |
| 6 | +- Actor head: Linear(F*16→320) → action logits |
| 7 | +- Critic head: Linear(F*16→1) → state value |
| 8 | +
|
| 9 | +Default (small): F=64, 0 residual blocks (~380K params) |
| 10 | +Large: F=128, 4 residual blocks (~2M params) |
8 | 11 | """ |
9 | 12 |
|
10 | 13 | import torch |
11 | 14 | import torch.nn as nn |
12 | 15 | from env import ACTION_SPACE_SIZE, BOARD_SIZE, NUM_CHANNELS |
13 | 16 |
|
14 | 17 |
|
| 18 | +class ResBlock(nn.Module): |
| 19 | + """Residual block: conv → BN → ReLU → conv → BN → skip add → ReLU.""" |
| 20 | + |
| 21 | + def __init__(self, filters: int): |
| 22 | + super().__init__() |
| 23 | + self.conv1 = nn.Conv2d(filters, filters, kernel_size=3, padding=1) |
| 24 | + self.bn1 = nn.BatchNorm2d(filters) |
| 25 | + self.conv2 = nn.Conv2d(filters, filters, kernel_size=3, padding=1) |
| 26 | + self.bn2 = nn.BatchNorm2d(filters) |
| 27 | + |
| 28 | + def forward(self, x: torch.Tensor) -> torch.Tensor: |
| 29 | + residual = x |
| 30 | + out = torch.relu(self.bn1(self.conv1(x))) |
| 31 | + out = self.bn2(self.conv2(out)) |
| 32 | + return torch.relu(out + residual) |
| 33 | + |
| 34 | + |
15 | 35 | class PPONet(nn.Module): |
16 | 36 | """Actor-critic network for PPO self-play training.""" |
17 | 37 |
|
18 | | - def __init__(self): |
| 38 | + def __init__(self, filters: int = 64, num_res_blocks: int = 0): |
19 | 39 | super().__init__() |
20 | | - filters_num = 64 |
21 | | - output_size = filters_num * BOARD_SIZE * BOARD_SIZE |
| 40 | + output_size = filters * BOARD_SIZE * BOARD_SIZE |
22 | 41 |
|
23 | | - self.body = nn.Sequential( |
24 | | - nn.Conv2d( |
25 | | - NUM_CHANNELS, filters_num, kernel_size=3, padding=1 |
26 | | - ), # input layer |
27 | | - nn.ReLU(), |
28 | | - nn.Conv2d(filters_num, filters_num, kernel_size=3, padding=1), |
| 42 | + layers = [ |
| 43 | + nn.Conv2d(NUM_CHANNELS, filters, kernel_size=3, padding=1), |
29 | 44 | nn.ReLU(), |
30 | | - nn.Flatten(), # gives (batch, filters_num * BOARD_SIZE * BOARD_SIZE) = 64 * 4 * 4 = 1024 |
31 | | - ) |
| 45 | + ] |
| 46 | + |
| 47 | + if num_res_blocks > 0: |
| 48 | + for _ in range(num_res_blocks): |
| 49 | + layers.append(ResBlock(filters)) |
| 50 | + else: |
| 51 | + # Original architecture: second conv + ReLU |
| 52 | + layers.append(nn.Conv2d(filters, filters, kernel_size=3, padding=1)) |
| 53 | + layers.append(nn.ReLU()) |
| 54 | + |
| 55 | + layers.append(nn.Flatten()) |
| 56 | + self.body = nn.Sequential(*layers) |
32 | 57 |
|
33 | 58 | self.actor_head = nn.Linear(output_size, ACTION_SPACE_SIZE) |
34 | 59 | self.critic_head = nn.Linear(output_size, 1) |
|
0 commit comments