-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels.py
151 lines (124 loc) · 5.14 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import torch
import torch.nn as nn
class ResidualConvBlock(nn.Module):
def __init__(
self, in_channels: int, out_channels: int, is_res: bool = False
) -> None:
'''standard ResNet style convolutional block'''
super().__init__()
self.same_channels = in_channels==out_channels
self.is_res = is_res
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 3, 1, 1),
nn.BatchNorm2d(out_channels),
nn.GELU(),
)
self.conv2 = nn.Sequential(
nn.Conv2d(out_channels, out_channels, 3, 1, 1),
nn.BatchNorm2d(out_channels),
nn.GELU(),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
if self.is_res:
x1 = self.conv1(x)
x2 = self.conv2(x1)
# this adds on correct residual in case channels have increased
if self.same_channels:
out = x + x2
else:
out = x1 + x2
return out / 1.414
else:
x1 = self.conv1(x)
x2 = self.conv2(x1)
return x2
class UnetDown(nn.Module):
def __init__(self, in_channels, out_channels):
'''process and downscale the image feature maps'''
super(UnetDown, self).__init__()
layers = [ResidualConvBlock(in_channels, out_channels), nn.MaxPool2d(2)]
self.model = nn.Sequential(*layers)
def forward(self, x):
return self.model(x)
class UnetUp(nn.Module):
def __init__(self, in_channels, out_channels):
'''process and upscale the image feature maps'''
super(UnetUp, self).__init__()
layers = [
nn.ConvTranspose2d(in_channels, out_channels, 2, 2),
ResidualConvBlock(out_channels, out_channels),
ResidualConvBlock(out_channels, out_channels),
]
self.model = nn.Sequential(*layers)
def forward(self, x, skip):
x = torch.cat((x, skip), 1)
x = self.model(x)
return x
class EmbedFC(nn.Module):
def __init__(self, input_dim, emb_dim):
'''generic one layer FC NN for embedding things'''
super(EmbedFC, self).__init__()
self.input_dim = input_dim
self.model = nn.Sequential(
nn.Linear(input_dim, emb_dim),
nn.GELU(),
nn.Linear(emb_dim, emb_dim),
)
def forward(self, x):
x = x.view(-1, self.input_dim)
return self.model(x)
class ContextUnet(nn.Module):
def __init__(self, in_channels, n_feat = 256, n_classes=10):
super().__init__()
self.in_channels = in_channels
self.n_feat = n_feat
self.n_classes = n_classes
self.init_conv = ResidualConvBlock(in_channels, n_feat, is_res=True)
self.down1 = UnetDown(n_feat, n_feat)
self.down2 = UnetDown(n_feat, 2 * n_feat)
self.to_vec = nn.Sequential(nn.AvgPool2d(7), nn.GELU())
self.timeembed1 = EmbedFC(1, 2*n_feat)
self.timeembed2 = EmbedFC(1, 1*n_feat)
self.contextembed1 = EmbedFC(n_classes, 2*n_feat)
self.contextembed2 = EmbedFC(n_classes, 1*n_feat)
self.up0 = nn.Sequential(
# nn.ConvTranspose2d(6 * n_feat, 2 * n_feat, 7, 7), # when concat temb and cemb end up w 6*n_feat
nn.ConvTranspose2d(2 * n_feat, 2 * n_feat, 7, 7), # otherwise just have 2*n_feat
nn.GroupNorm(8, 2 * n_feat),
nn.ReLU(),
)
self.up1 = UnetUp(4 * n_feat, n_feat)
self.up2 = UnetUp(2 * n_feat, n_feat)
self.out = nn.Sequential(
nn.Conv2d(2 * n_feat, n_feat, 3, 1, 1),
nn.GroupNorm(8, n_feat),
nn.ReLU(),
nn.Conv2d(n_feat, self.in_channels, 3, 1, 1),
)
def forward(self, x, c, t, context_mask):
# x is (noisy) image, c is context label, t is timestep,
# context_mask says which samples to block the context on
x = self.init_conv(x)
down1 = self.down1(x)
down2 = self.down2(down1)
hiddenvec = self.to_vec(down2)
# convert context to one hot embedding
c = nn.functional.one_hot(c, num_classes=self.n_classes).type(torch.float)
# mask out context if context_mask == 1
context_mask = context_mask[:, None]
context_mask = context_mask.repeat(1,self.n_classes)
context_mask = (-1*(1-context_mask)) # need to flip 0 <-> 1
c = c * context_mask
# embed context, time step
cemb1 = self.contextembed1(c).view(-1, self.n_feat * 2, 1, 1)
temb1 = self.timeembed1(t).view(-1, self.n_feat * 2, 1, 1)
cemb2 = self.contextembed2(c).view(-1, self.n_feat, 1, 1)
temb2 = self.timeembed2(t).view(-1, self.n_feat, 1, 1)
# could concatenate the context embedding here instead of adaGN
# hiddenvec = torch.cat((hiddenvec, temb1, cemb1), 1)
up1 = self.up0(hiddenvec)
# up2 = self.up1(up1, down2) # if want to avoid add and multiply embeddings
up2 = self.up1(cemb1*up1+ temb1, down2) # add and multiply embeddings
up3 = self.up2(cemb2*up2+ temb2, down1)
out = self.out(torch.cat((up3, x), 1))
return out