Skip to content

Commit 160bba6

Browse files
author
lucasdavid
committed
Add RetinaNet Pascal VOC/2012
1 parent b02f285 commit 160bba6

File tree

13 files changed

+4863
-10
lines changed

13 files changed

+4863
-10
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ venv.bak/
117117
# Rope project settings
118118
.ropeproject
119119

120+
.idea/
121+
120122
# mkdocs documentation
121123
/site
122124

@@ -132,4 +134,3 @@ data/
132134
logs/
133135
secrets/
134136
datasets
135-

README.md

+13-7
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
11
# [![Tensorflow Logo](data/tf.png)](https://tensorflow.org) Algorithms in Tensorflow
22

3-
Experiments with tensorflow 2.0, gpu support,
4-
persistent logging and stable docker env.
5-
6-
## Usage
7-
```shell
8-
./actions/run.sh {up,down,build}
9-
```
3+
Experiments with tensorflow 2.0, gpu support, persistent logging and stable docker env.
104

115
## Summary
126
### Supervised
@@ -42,3 +36,15 @@ persistent logging and stable docker env.
4236
| [Guided Gradient Backpropagation](notebooks/explaining/saliency-gradient-backpropagation-guided.ipynb) | saliency | Gradient-based explaining method considering positive intermediate gradients | [article](https://arxiv.org/pdf/1412.6806.pdf) |
4337
| [Smooth Gradient Backpropagation](notebooks/explaining/saliency-gradient-backpropagation-smooth.ipynb) | saliency | Gradient-based explaining method with local-level gradient correction | [article](https://arxiv.org/pdf/1706.03825.pdf) |
4438
| [Full Gradient Representation](notebooks/explaining/saliency-gradient-backpropagation-full.ipynb) | saliency | Explaining using function linearization with gradient-based and bias information | [article](https://arxiv.org/pdf/1905.00780.pdf) |
39+
40+
41+
## Usage
42+
Code in this repository is kept inside jupyter notebooks, so any jupyter
43+
server will do. I added a docker-compose env to simplify things, which can
44+
be used as follows:
45+
```shell
46+
./actions/run.sh # start jupyter notebook
47+
./actions.run.sh {up,down,build} # more compose commands
48+
./actions.run.sh exec experiments python path/to/file.py # any commands, really
49+
./actions/run.sh tensorboard # start tensorboard
50+
```

actions/run.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ run()
55
CMD=${1:-up}
66

77
if [[ "$CMD" == "up" ]]; then
8-
docker-compose --env-file config/.env -f config/docker-compose.yml up -d
8+
docker-compose --env-file config/.env -f config/docker-compose.yml up
99
elif [[ "$CMD" == "tensorboard" ]]; then
1010
docker-compose --env-file config/.env -f config/docker-compose.yml \
1111
exec experiments \

notebooks/supervised/detection/config/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
import tensorflow as tf
2+
3+
from .metrics import bbox_iou
4+
from .utils import swap_xy, convert_to_xywh
5+
6+
7+
def random_flip_horizontal(image, boxes):
8+
if tf.random.uniform(()) > 0.5:
9+
image = tf.image.flip_left_right(image)
10+
boxes = tf.stack([1 - boxes[:, 2], boxes[:, 1], 1 - boxes[:, 0], boxes[:, 3]], axis=-1)
11+
return image, boxes
12+
13+
14+
def resize_and_pad_image(image, min_side=800.0, max_side=1333.0, jitter=(640, 1024), stride=128.0):
15+
image_shape = tf.cast(tf.shape(image)[:2], dtype=tf.float32)
16+
if jitter is not None:
17+
min_side = tf.random.uniform((), jitter[0], jitter[1], dtype=tf.float32)
18+
ratio = min_side / tf.reduce_min(image_shape)
19+
if ratio * tf.reduce_max(image_shape) > max_side:
20+
ratio = max_side / tf.reduce_max(image_shape)
21+
image_shape = ratio * image_shape
22+
image = tf.image.resize(image, tf.cast(image_shape, dtype=tf.int32))
23+
padded_image_shape = tf.cast(
24+
tf.math.ceil(image_shape / stride) * stride, dtype=tf.int32
25+
)
26+
image = tf.image.pad_to_bounding_box(
27+
image, 0, 0, padded_image_shape[0], padded_image_shape[1]
28+
)
29+
return image, image_shape, ratio
30+
31+
32+
def preprocess_data(sample):
33+
"""Applies preprocessing step to a single sample
34+
"""
35+
image = sample["image"]
36+
bbox = swap_xy(sample["objects"]["bbox"])
37+
class_id = tf.cast(sample["objects"]["label"], dtype=tf.int32)
38+
39+
image, bbox = random_flip_horizontal(image, bbox)
40+
image, image_shape, _ = resize_and_pad_image(image)
41+
42+
bbox = tf.stack(
43+
[
44+
bbox[:, 0] * image_shape[1],
45+
bbox[:, 1] * image_shape[0],
46+
bbox[:, 2] * image_shape[1],
47+
bbox[:, 3] * image_shape[0],
48+
],
49+
axis=-1,
50+
)
51+
bbox = convert_to_xywh(bbox)
52+
return image, bbox, class_id
53+
54+
55+
class AnchorBox:
56+
"""Generates anchor boxes.
57+
58+
This class has operations to generate anchor boxes for feature maps at
59+
strides `[8, 16, 32, 64, 128]`. Where each anchor each box is of the
60+
format `[x, y, width, height]`.
61+
62+
Attributes:
63+
aspect_ratios: A list of float values representing the aspect ratios of
64+
the anchor boxes at each location on the feature map
65+
scales: A list of float values representing the scale of the anchor boxes
66+
at each location on the feature map.
67+
num_anchors: The number of anchor boxes at each location on feature map
68+
areas: A list of float values representing the areas of the anchor
69+
boxes for each feature map in the feature pyramid.
70+
strides: A list of float value representing the strides for each feature
71+
map in the feature pyramid.
72+
"""
73+
74+
def __init__(self):
75+
self.aspect_ratios = [0.5, 1.0, 2.0]
76+
self.scales = [2 ** x for x in [0, 1 / 3, 2 / 3]]
77+
78+
self._num_anchors = len(self.aspect_ratios) * len(self.scales)
79+
self._strides = [2 ** i for i in range(3, 8)]
80+
self._areas = [x ** 2 for x in [32.0, 64.0, 128.0, 256.0, 512.0]]
81+
self._anchor_dims = self._compute_dims()
82+
83+
def _compute_dims(self):
84+
anchor_dims_all = []
85+
for area in self._areas:
86+
anchor_dims = []
87+
for ratio in self.aspect_ratios:
88+
anchor_height = tf.math.sqrt(area / ratio)
89+
anchor_width = area / anchor_height
90+
dims = tf.convert_to_tensor([[[anchor_width, anchor_height]]])
91+
92+
for scale in self.scales:
93+
anchor_dims.append(scale * dims)
94+
anchor_dims = tf.stack(anchor_dims, axis=-2)
95+
anchor_dims_all.append(anchor_dims)
96+
return anchor_dims_all
97+
98+
def _get_anchors(self, feature_height, feature_width, level):
99+
"""Generates anchor boxes for a given feature map size and level
100+
101+
Arguments:
102+
feature_height: An integer representing the height of the feature map.
103+
feature_width: An integer representing the width of the feature map.
104+
level: An integer representing the level of the feature map in the
105+
feature pyramid.
106+
107+
Returns:
108+
anchor boxes with the shape
109+
`(feature_height * feature_width * num_anchors, 4)`
110+
"""
111+
rx = tf.range(feature_width, dtype=tf.float32) + 0.5
112+
ry = tf.range(feature_height, dtype=tf.float32) + 0.5
113+
centers = tf.stack(tf.meshgrid(rx, ry), axis=-1) * self._strides[level - 3]
114+
centers = tf.expand_dims(centers, axis=-2)
115+
centers = tf.tile(centers, [1, 1, self._num_anchors, 1])
116+
dims = tf.tile(
117+
self._anchor_dims[level - 3], [feature_height, feature_width, 1, 1]
118+
)
119+
anchors = tf.concat([centers, dims], axis=-1)
120+
return tf.reshape(
121+
anchors, [feature_height * feature_width * self._num_anchors, 4]
122+
)
123+
124+
def get_anchors(self, image_height, image_width):
125+
anchors = [
126+
self._get_anchors(
127+
tf.math.ceil(image_height / 2 ** i),
128+
tf.math.ceil(image_width / 2 ** i),
129+
i,
130+
)
131+
for i in range(3, 8)
132+
]
133+
return tf.concat(anchors, axis=0)
134+
135+
136+
class LabelEncoder:
137+
def __init__(self, preprocessing_fn):
138+
self.preprocessing_fn = preprocessing_fn
139+
140+
self._anchor_box = AnchorBox()
141+
142+
# The scaling factors used to scale the bounding box targets.
143+
self._box_variance = tf.convert_to_tensor(
144+
[0.1, 0.1, 0.2, 0.2], dtype=tf.float32
145+
)
146+
147+
def _match_anchor_boxes(
148+
self, anchor_boxes, gt_boxes, match_iou=0.5, ignore_iou=0.4
149+
):
150+
"""Matches ground truth boxes to anchor boxes based on IOU.
151+
152+
1. Calculates the pairwise IOU for the M `anchor_boxes` and N `gt_boxes`
153+
to get a `(M, N)` shaped matrix.
154+
2. The ground truth box with the maximum IOU in each row is assigned to
155+
the anchor box provided the IOU is greater than `match_iou`.
156+
3. If the maximum IOU in a row is less than `ignore_iou`, the anchor
157+
box is assigned with the background class.
158+
4. The remaining anchor boxes that do not have any class assigned are
159+
ignored during training.
160+
161+
Arguments:
162+
anchor_boxes: `(total_anchors, 4), [x, y, width, height])`
163+
gt_boxes: `(num_objects, 4)`, `[x, y, width, height]`
164+
165+
match_iou: A float value representing the minimum IOU threshold for
166+
determining if a ground truth box can be assigned to an anchor box.
167+
ignore_iou: A float value representing the IOU threshold under which
168+
an anchor box is assigned to the background class.
169+
170+
Returns:
171+
matched_gt_idx: Index of the matched object
172+
positive_mask: A mask for anchor boxes that have been assigned ground
173+
truth boxes.
174+
ignore_mask: A mask for anchor boxes that need to by ignored during
175+
training
176+
"""
177+
iou_matrix = bbox_iou(anchor_boxes, gt_boxes)
178+
max_iou = tf.reduce_max(iou_matrix, axis=1)
179+
matched_gt_idx = tf.argmax(iou_matrix, axis=1)
180+
positive_mask = tf.greater_equal(max_iou, match_iou)
181+
negative_mask = tf.less(max_iou, ignore_iou)
182+
ignore_mask = tf.logical_not(tf.logical_or(positive_mask, negative_mask))
183+
return (
184+
matched_gt_idx,
185+
tf.cast(positive_mask, dtype=tf.float32),
186+
tf.cast(ignore_mask, dtype=tf.float32),
187+
)
188+
189+
def _compute_box_target(self, anchor_boxes, matched_gt_boxes):
190+
"""Transforms the ground truth boxes into targets for training"""
191+
box_target = tf.concat(
192+
[
193+
(matched_gt_boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:],
194+
tf.math.log(matched_gt_boxes[:, 2:] / anchor_boxes[:, 2:]),
195+
],
196+
axis=-1,
197+
)
198+
box_target = box_target / self._box_variance
199+
return box_target
200+
201+
def _encode_sample(self, image_shape, gt_boxes, cls_ids):
202+
anchor_boxes = self._anchor_box.get_anchors(image_shape[1], image_shape[2])
203+
cls_ids = tf.cast(cls_ids, dtype=tf.float32)
204+
matched_gt_idx, positive_mask, ignore_mask = self._match_anchor_boxes(
205+
anchor_boxes, gt_boxes
206+
)
207+
matched_gt_boxes = tf.gather(gt_boxes, matched_gt_idx)
208+
box_target = self._compute_box_target(anchor_boxes, matched_gt_boxes)
209+
matched_gt_cls_ids = tf.gather(cls_ids, matched_gt_idx)
210+
cls_target = tf.where(
211+
tf.not_equal(positive_mask, 1.0), -1.0, matched_gt_cls_ids
212+
)
213+
cls_target = tf.where(tf.equal(ignore_mask, 1.0), -2.0, cls_target)
214+
cls_target = tf.expand_dims(cls_target, axis=-1)
215+
label = tf.concat([box_target, cls_target], axis=-1)
216+
return label
217+
218+
def encode_batch(self, batch_images, gt_boxes, cls_ids):
219+
"""Creates box and classification targets for a batch"""
220+
images_shape = tf.shape(batch_images)
221+
batch_size = images_shape[0]
222+
223+
labels = tf.TensorArray(dtype=tf.float32, size=batch_size, dynamic_size=True)
224+
for i in range(batch_size):
225+
label = self._encode_sample(images_shape, gt_boxes[i], cls_ids[i])
226+
labels = labels.write(i, label)
227+
batch_images = self.preprocessing_fn(batch_images)
228+
return batch_images, labels.stack()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import tensorflow as tf
2+
3+
from .utils import convert_to_corners
4+
5+
6+
def bbox_iou(b1, b2):
7+
c1 = convert_to_corners(b1)
8+
c2 = convert_to_corners(b2)
9+
lu = tf.maximum(c1[:, None, :2], c2[:, :2])
10+
rd = tf.minimum(c1[:, None, 2:], c2[:, 2:])
11+
i = tf.maximum(0.0, rd - lu)
12+
i_area = i[:, :, 0] * i[:, :, 1]
13+
b1_area = b1[:, 2] * b1[:, 3]
14+
b2_area = b2[:, 2] * b2[:, 3]
15+
union_area = tf.maximum(
16+
b1_area[:, None] + b2_area - i_area, 1e-8
17+
)
18+
return tf.clip_by_value(i_area / union_area, 0.0, 1.0)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import numpy as np
2+
import tensorflow as tf
3+
from tensorflow.keras import Input, Model, Sequential
4+
from tensorflow.keras.layers import Layer, Conv2D, UpSampling2D
5+
from tensorflow.python.keras.layers import Activation
6+
7+
8+
def get_backbone(layers=('conv3_block4_out', 'conv4_block6_out', 'conv5_block3_out')):
9+
resnet50 = tf.keras.applications.ResNet50(include_top=False, input_shape=[None, None, 3])
10+
11+
return Model(inputs=[resnet50.inputs],
12+
outputs=[resnet50.get_layer(l).output for l in layers],
13+
name='resnet50_backbone')
14+
15+
16+
class FeaturePyramid(Layer):
17+
def __init__(self, backbone=None, **kwargs):
18+
super(FeaturePyramid, self).__init__(name='FeaturePyramid', **kwargs)
19+
self.backbone = backbone if backbone else get_backbone()
20+
self.conv_c3_1x1 = Conv2D(256, 1, 1, 'same')
21+
self.conv_c4_1x1 = Conv2D(256, 1, 1, 'same')
22+
self.conv_c5_1x1 = Conv2D(256, 1, 1, 'same')
23+
self.conv_c3_3x3 = Conv2D(256, 3, 1, 'same')
24+
self.conv_c4_3x3 = Conv2D(256, 3, 1, 'same')
25+
self.conv_c5_3x3 = Conv2D(256, 3, 1, 'same')
26+
self.conv_c6_3x3 = Conv2D(256, 3, 2, 'same')
27+
self.conv_c7_3x3 = Conv2D(256, 3, 2, 'same')
28+
self.upsample_2x = UpSampling2D(2)
29+
30+
def call(self, images, training=False):
31+
c3_output, c4_output, c5_output = self.backbone(images, training=training)
32+
p3_output = self.conv_c3_1x1(c3_output)
33+
p4_output = self.conv_c4_1x1(c4_output)
34+
p5_output = self.conv_c5_1x1(c5_output)
35+
p4_output = p4_output + self.upsample_2x(p5_output)
36+
p3_output = p3_output + self.upsample_2x(p4_output)
37+
p3_output = self.conv_c3_3x3(p3_output)
38+
p4_output = self.conv_c4_3x3(p4_output)
39+
p5_output = self.conv_c5_3x3(p5_output)
40+
p6_output = self.conv_c6_3x3(c5_output)
41+
p7_output = self.conv_c7_3x3(tf.nn.relu(p6_output))
42+
return p3_output, p4_output, p5_output, p6_output, p7_output
43+
44+
45+
def build_head(output_filters, bias_init):
46+
head = Sequential([Input(shape=[None, None, 256])], name='images')
47+
rn_i = tf.initializers.RandomNormal(0.0, 0.01)
48+
49+
for _ in range(4):
50+
head.add(Conv2D(256, 3, padding="same", kernel_initializer=rn_i))
51+
head.add(Activation('relu'))
52+
head.add(
53+
Conv2D(
54+
output_filters,
55+
3,
56+
1,
57+
padding="same",
58+
kernel_initializer=rn_i,
59+
bias_initializer=bias_init,
60+
)
61+
)
62+
return head
63+
64+
65+
class RetinaNet(Model):
66+
"""A subclassed Keras model implementing the RetinaNet architecture.
67+
68+
Attributes:
69+
num_classes: Number of classes in the dataset.
70+
backbone: The backbone to build the feature pyramid from.
71+
Currently supports ResNet50 only.
72+
"""
73+
74+
def __init__(self, num_classes, backbone=None, **kwargs):
75+
super(RetinaNet, self).__init__(name="RetinaNet", **kwargs)
76+
self.fpn = FeaturePyramid(backbone)
77+
self.num_classes = num_classes
78+
79+
prior_probability = tf.constant_initializer(-np.log((1 - 0.01) / 0.01))
80+
self.cls_head = build_head(9 * num_classes, prior_probability)
81+
self.box_head = build_head(9 * 4, "zeros")
82+
83+
def call(self, image, training=False):
84+
features = self.fpn(image, training=training)
85+
N = tf.shape(image)[0]
86+
cls_outputs = []
87+
box_outputs = []
88+
for feature in features:
89+
box_outputs.append(tf.reshape(self.box_head(feature), [N, -1, 4]))
90+
cls_outputs.append(
91+
tf.reshape(self.cls_head(feature), [N, -1, self.num_classes])
92+
)
93+
cls_outputs = tf.concat(cls_outputs, axis=1)
94+
box_outputs = tf.concat(box_outputs, axis=1)
95+
return tf.concat([box_outputs, cls_outputs], axis=-1)

0 commit comments

Comments
 (0)