lucasdavid
diff --git a/‎.gitignore
+2-1 b/‎.gitignore
+2-1
diff --git a/‎README.md
+13-7 b/‎README.md
+13-7
diff --git a/‎actions/run.sh
+1-1 b/‎actions/run.sh
+1-1
diff --git a/‎notebooks/supervised/detection/config/__init__.py b/‎notebooks/supervised/detection/config/__init__.py
diff --git a/‎notebooks/supervised/detection/config/data.py
+228 b/‎notebooks/supervised/detection/config/data.py
+228
diff --git a/‎notebooks/supervised/detection/config/metrics.py
+18 b/‎notebooks/supervised/detection/config/metrics.py
+18
diff --git a/‎notebooks/supervised/detection/config/model.py
+95 b/‎notebooks/supervised/detection/config/model.py
+95
@@ -117,6 +117,8 @@ venv.bak/
 # Rope project settings
 .ropeproject
 
+.idea/
+
 # mkdocs documentation
 /site
 
@@ -132,4 +134,3 @@ data/
 logs/
 secrets/
 datasets
-
 
@@ -1,12 +1,6 @@
 # [![Tensorflow Logo](data/tf.png)](https://tensorflow.org) Algorithms in Tensorflow
 
-Experiments with tensorflow 2.0, gpu support,
-persistent logging and stable docker env.
-
-## Usage
-```shell
-./actions/run.sh {up,down,build}
-```
+Experiments with tensorflow 2.0, gpu support, persistent logging and stable docker env.
 
 ## Summary
 ### Supervised
@@ -42,3 +36,15 @@ persistent logging and stable docker env.
 | [Guided Gradient Backpropagation](notebooks/explaining/saliency-gradient-backpropagation-guided.ipynb) | saliency | Gradient-based explaining method considering positive intermediate gradients | [article](https://arxiv.org/pdf/1412.6806.pdf)  |
 | [Smooth Gradient Backpropagation](notebooks/explaining/saliency-gradient-backpropagation-smooth.ipynb) | saliency | Gradient-based explaining method with local-level gradient correction | [article](https://arxiv.org/pdf/1706.03825.pdf) |
 | [Full Gradient Representation](notebooks/explaining/saliency-gradient-backpropagation-full.ipynb) | saliency | Explaining using function linearization with gradient-based and bias information | [article](https://arxiv.org/pdf/1905.00780.pdf) |
+
+
+## Usage
+Code in this repository is kept inside jupyter notebooks, so any jupyter
+server will do. I added a docker-compose env to simplify things, which can
+be used as follows:
+```shell
+./actions/run.sh                                          # start jupyter notebook
+./actions.run.sh {up,down,build}                          # more compose commands
+./actions.run.sh exec experiments python path/to/file.py  # any commands, really
+./actions/run.sh tensorboard                              # start tensorboard
+```
@@ -5,7 +5,7 @@ run()
   CMD=${1:-up}
 
   if [[ "$CMD" == "up" ]]; then
-    docker-compose --env-file config/.env -f config/docker-compose.yml up -d
+    docker-compose --env-file config/.env -f config/docker-compose.yml up
   elif [[ "$CMD" == "tensorboard" ]]; then
     docker-compose --env-file config/.env -f config/docker-compose.yml \
                  exec experiments \
 
@@ -0,0 +1,228 @@
+import tensorflow as tf
+
+from .metrics import bbox_iou
+from .utils import swap_xy, convert_to_xywh
+
+
+def random_flip_horizontal(image, boxes):
+    if tf.random.uniform(()) > 0.5:
+        image = tf.image.flip_left_right(image)
+        boxes = tf.stack([1 - boxes[:, 2], boxes[:, 1], 1 - boxes[:, 0], boxes[:, 3]], axis=-1)
+    return image, boxes
+
+
+def resize_and_pad_image(image, min_side=800.0, max_side=1333.0, jitter=(640, 1024), stride=128.0):
+    image_shape = tf.cast(tf.shape(image)[:2], dtype=tf.float32)
+    if jitter is not None:
+        min_side = tf.random.uniform((), jitter[0], jitter[1], dtype=tf.float32)
+    ratio = min_side / tf.reduce_min(image_shape)
+    if ratio * tf.reduce_max(image_shape) > max_side:
+        ratio = max_side / tf.reduce_max(image_shape)
+    image_shape = ratio * image_shape
+    image = tf.image.resize(image, tf.cast(image_shape, dtype=tf.int32))
+    padded_image_shape = tf.cast(
+        tf.math.ceil(image_shape / stride) * stride, dtype=tf.int32
+    )
+    image = tf.image.pad_to_bounding_box(
+        image, 0, 0, padded_image_shape[0], padded_image_shape[1]
+    )
+    return image, image_shape, ratio
+
+
+def preprocess_data(sample):
+    """Applies preprocessing step to a single sample
+    """
+    image = sample["image"]
+    bbox = swap_xy(sample["objects"]["bbox"])
+    class_id = tf.cast(sample["objects"]["label"], dtype=tf.int32)
+
+    image, bbox = random_flip_horizontal(image, bbox)
+    image, image_shape, _ = resize_and_pad_image(image)
+
+    bbox = tf.stack(
+        [
+            bbox[:, 0] * image_shape[1],
+            bbox[:, 1] * image_shape[0],
+            bbox[:, 2] * image_shape[1],
+            bbox[:, 3] * image_shape[0],
+        ],
+        axis=-1,
+    )
+    bbox = convert_to_xywh(bbox)
+    return image, bbox, class_id
+
+
+class AnchorBox:
+    """Generates anchor boxes.
+
+    This class has operations to generate anchor boxes for feature maps at
+    strides `[8, 16, 32, 64, 128]`. Where each anchor each box is of the
+    format `[x, y, width, height]`.
+
+    Attributes:
+      aspect_ratios: A list of float values representing the aspect ratios of
+        the anchor boxes at each location on the feature map
+      scales: A list of float values representing the scale of the anchor boxes
+        at each location on the feature map.
+      num_anchors: The number of anchor boxes at each location on feature map
+      areas: A list of float values representing the areas of the anchor
+        boxes for each feature map in the feature pyramid.
+      strides: A list of float value representing the strides for each feature
+        map in the feature pyramid.
+    """
+
+    def __init__(self):
+        self.aspect_ratios = [0.5, 1.0, 2.0]
+        self.scales = [2 ** x for x in [0, 1 / 3, 2 / 3]]
+
+        self._num_anchors = len(self.aspect_ratios) * len(self.scales)
+        self._strides = [2 ** i for i in range(3, 8)]
+        self._areas = [x ** 2 for x in [32.0, 64.0, 128.0, 256.0, 512.0]]
+        self._anchor_dims = self._compute_dims()
+
+    def _compute_dims(self):
+        anchor_dims_all = []
+        for area in self._areas:
+            anchor_dims = []
+            for ratio in self.aspect_ratios:
+                anchor_height = tf.math.sqrt(area / ratio)
+                anchor_width = area / anchor_height
+                dims = tf.convert_to_tensor([[[anchor_width, anchor_height]]])
+
+                for scale in self.scales:
+                    anchor_dims.append(scale * dims)
+            anchor_dims = tf.stack(anchor_dims, axis=-2)
+            anchor_dims_all.append(anchor_dims)
+        return anchor_dims_all
+
+    def _get_anchors(self, feature_height, feature_width, level):
+        """Generates anchor boxes for a given feature map size and level
+
+        Arguments:
+          feature_height: An integer representing the height of the feature map.
+          feature_width: An integer representing the width of the feature map.
+          level: An integer representing the level of the feature map in the
+            feature pyramid.
+
+        Returns:
+          anchor boxes with the shape
+          `(feature_height * feature_width * num_anchors, 4)`
+        """
+        rx = tf.range(feature_width, dtype=tf.float32) + 0.5
+        ry = tf.range(feature_height, dtype=tf.float32) + 0.5
+        centers = tf.stack(tf.meshgrid(rx, ry), axis=-1) * self._strides[level - 3]
+        centers = tf.expand_dims(centers, axis=-2)
+        centers = tf.tile(centers, [1, 1, self._num_anchors, 1])
+        dims = tf.tile(
+            self._anchor_dims[level - 3], [feature_height, feature_width, 1, 1]
+        )
+        anchors = tf.concat([centers, dims], axis=-1)
+        return tf.reshape(
+            anchors, [feature_height * feature_width * self._num_anchors, 4]
+        )
+
+    def get_anchors(self, image_height, image_width):
+        anchors = [
+            self._get_anchors(
+                tf.math.ceil(image_height / 2 ** i),
+                tf.math.ceil(image_width / 2 ** i),
+                i,
+            )
+            for i in range(3, 8)
+        ]
+        return tf.concat(anchors, axis=0)
+
+
+class LabelEncoder:
+    def __init__(self, preprocessing_fn):
+        self.preprocessing_fn = preprocessing_fn
+
+        self._anchor_box = AnchorBox()
+
+        # The scaling factors used to scale the bounding box targets.
+        self._box_variance = tf.convert_to_tensor(
+            [0.1, 0.1, 0.2, 0.2], dtype=tf.float32
+        )
+
+    def _match_anchor_boxes(
+            self, anchor_boxes, gt_boxes, match_iou=0.5, ignore_iou=0.4
+    ):
+        """Matches ground truth boxes to anchor boxes based on IOU.
+
+        1. Calculates the pairwise IOU for the M `anchor_boxes` and N `gt_boxes`
+          to get a `(M, N)` shaped matrix.
+        2. The ground truth box with the maximum IOU in each row is assigned to
+          the anchor box provided the IOU is greater than `match_iou`.
+        3. If the maximum IOU in a row is less than `ignore_iou`, the anchor
+          box is assigned with the background class.
+        4. The remaining anchor boxes that do not have any class assigned are
+          ignored during training.
+
+        Arguments:
+          anchor_boxes: `(total_anchors, 4), [x, y, width, height])`
+          gt_boxes: `(num_objects, 4)`, `[x, y, width, height]`
+
+          match_iou: A float value representing the minimum IOU threshold for
+            determining if a ground truth box can be assigned to an anchor box.
+          ignore_iou: A float value representing the IOU threshold under which
+            an anchor box is assigned to the background class.
+
+        Returns:
+          matched_gt_idx: Index of the matched object
+          positive_mask: A mask for anchor boxes that have been assigned ground
+            truth boxes.
+          ignore_mask: A mask for anchor boxes that need to by ignored during
+            training
+        """
+        iou_matrix = bbox_iou(anchor_boxes, gt_boxes)
+        max_iou = tf.reduce_max(iou_matrix, axis=1)
+        matched_gt_idx = tf.argmax(iou_matrix, axis=1)
+        positive_mask = tf.greater_equal(max_iou, match_iou)
+        negative_mask = tf.less(max_iou, ignore_iou)
+        ignore_mask = tf.logical_not(tf.logical_or(positive_mask, negative_mask))
+        return (
+            matched_gt_idx,
+            tf.cast(positive_mask, dtype=tf.float32),
+            tf.cast(ignore_mask, dtype=tf.float32),
+        )
+
+    def _compute_box_target(self, anchor_boxes, matched_gt_boxes):
+        """Transforms the ground truth boxes into targets for training"""
+        box_target = tf.concat(
+            [
+                (matched_gt_boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:],
+                tf.math.log(matched_gt_boxes[:, 2:] / anchor_boxes[:, 2:]),
+            ],
+            axis=-1,
+        )
+        box_target = box_target / self._box_variance
+        return box_target
+
+    def _encode_sample(self, image_shape, gt_boxes, cls_ids):
+        anchor_boxes = self._anchor_box.get_anchors(image_shape[1], image_shape[2])
+        cls_ids = tf.cast(cls_ids, dtype=tf.float32)
+        matched_gt_idx, positive_mask, ignore_mask = self._match_anchor_boxes(
+            anchor_boxes, gt_boxes
+        )
+        matched_gt_boxes = tf.gather(gt_boxes, matched_gt_idx)
+        box_target = self._compute_box_target(anchor_boxes, matched_gt_boxes)
+        matched_gt_cls_ids = tf.gather(cls_ids, matched_gt_idx)
+        cls_target = tf.where(
+            tf.not_equal(positive_mask, 1.0), -1.0, matched_gt_cls_ids
+        )
+        cls_target = tf.where(tf.equal(ignore_mask, 1.0), -2.0, cls_target)
+        cls_target = tf.expand_dims(cls_target, axis=-1)
+        label = tf.concat([box_target, cls_target], axis=-1)
+        return label
+
+    def encode_batch(self, batch_images, gt_boxes, cls_ids):
+        """Creates box and classification targets for a batch"""
+        images_shape = tf.shape(batch_images)
+        batch_size = images_shape[0]
+
+        labels = tf.TensorArray(dtype=tf.float32, size=batch_size, dynamic_size=True)
+        for i in range(batch_size):
+            label = self._encode_sample(images_shape, gt_boxes[i], cls_ids[i])
+            labels = labels.write(i, label)
+        batch_images = self.preprocessing_fn(batch_images)
+        return batch_images, labels.stack()
@@ -0,0 +1,18 @@
+import tensorflow as tf
+
+from .utils import convert_to_corners
+
+
+def bbox_iou(b1, b2):
+    c1 = convert_to_corners(b1)
+    c2 = convert_to_corners(b2)
+    lu = tf.maximum(c1[:, None, :2], c2[:, :2])
+    rd = tf.minimum(c1[:, None, 2:], c2[:, 2:])
+    i = tf.maximum(0.0, rd - lu)
+    i_area = i[:, :, 0] * i[:, :, 1]
+    b1_area = b1[:, 2] * b1[:, 3]
+    b2_area = b2[:, 2] * b2[:, 3]
+    union_area = tf.maximum(
+        b1_area[:, None] + b2_area - i_area, 1e-8
+    )
+    return tf.clip_by_value(i_area / union_area, 0.0, 1.0)
@@ -0,0 +1,95 @@
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras import Input, Model, Sequential
+from tensorflow.keras.layers import Layer, Conv2D, UpSampling2D
+from tensorflow.python.keras.layers import Activation
+
+
+def get_backbone(layers=('conv3_block4_out', 'conv4_block6_out', 'conv5_block3_out')):
+    resnet50 = tf.keras.applications.ResNet50(include_top=False, input_shape=[None, None, 3])
+
+    return Model(inputs=[resnet50.inputs],
+                 outputs=[resnet50.get_layer(l).output for l in layers],
+                 name='resnet50_backbone')
+
+
+class FeaturePyramid(Layer):
+    def __init__(self, backbone=None, **kwargs):
+        super(FeaturePyramid, self).__init__(name='FeaturePyramid', **kwargs)
+        self.backbone = backbone if backbone else get_backbone()
+        self.conv_c3_1x1 = Conv2D(256, 1, 1, 'same')
+        self.conv_c4_1x1 = Conv2D(256, 1, 1, 'same')
+        self.conv_c5_1x1 = Conv2D(256, 1, 1, 'same')
+        self.conv_c3_3x3 = Conv2D(256, 3, 1, 'same')
+        self.conv_c4_3x3 = Conv2D(256, 3, 1, 'same')
+        self.conv_c5_3x3 = Conv2D(256, 3, 1, 'same')
+        self.conv_c6_3x3 = Conv2D(256, 3, 2, 'same')
+        self.conv_c7_3x3 = Conv2D(256, 3, 2, 'same')
+        self.upsample_2x = UpSampling2D(2)
+
+    def call(self, images, training=False):
+        c3_output, c4_output, c5_output = self.backbone(images, training=training)
+        p3_output = self.conv_c3_1x1(c3_output)
+        p4_output = self.conv_c4_1x1(c4_output)
+        p5_output = self.conv_c5_1x1(c5_output)
+        p4_output = p4_output + self.upsample_2x(p5_output)
+        p3_output = p3_output + self.upsample_2x(p4_output)
+        p3_output = self.conv_c3_3x3(p3_output)
+        p4_output = self.conv_c4_3x3(p4_output)
+        p5_output = self.conv_c5_3x3(p5_output)
+        p6_output = self.conv_c6_3x3(c5_output)
+        p7_output = self.conv_c7_3x3(tf.nn.relu(p6_output))
+        return p3_output, p4_output, p5_output, p6_output, p7_output
+
+
+def build_head(output_filters, bias_init):
+    head = Sequential([Input(shape=[None, None, 256])], name='images')
+    rn_i = tf.initializers.RandomNormal(0.0, 0.01)
+
+    for _ in range(4):
+        head.add(Conv2D(256, 3, padding="same", kernel_initializer=rn_i))
+        head.add(Activation('relu'))
+    head.add(
+        Conv2D(
+            output_filters,
+            3,
+            1,
+            padding="same",
+            kernel_initializer=rn_i,
+            bias_initializer=bias_init,
+        )
+    )
+    return head
+
+
+class RetinaNet(Model):
+    """A subclassed Keras model implementing the RetinaNet architecture.
+
+    Attributes:
+      num_classes: Number of classes in the dataset.
+      backbone: The backbone to build the feature pyramid from.
+        Currently supports ResNet50 only.
+    """
+
+    def __init__(self, num_classes, backbone=None, **kwargs):
+        super(RetinaNet, self).__init__(name="RetinaNet", **kwargs)
+        self.fpn = FeaturePyramid(backbone)
+        self.num_classes = num_classes
+
+        prior_probability = tf.constant_initializer(-np.log((1 - 0.01) / 0.01))
+        self.cls_head = build_head(9 * num_classes, prior_probability)
+        self.box_head = build_head(9 * 4, "zeros")
+
+    def call(self, image, training=False):
+        features = self.fpn(image, training=training)
+        N = tf.shape(image)[0]
+        cls_outputs = []
+        box_outputs = []
+        for feature in features:
+            box_outputs.append(tf.reshape(self.box_head(feature), [N, -1, 4]))
+            cls_outputs.append(
+                tf.reshape(self.cls_head(feature), [N, -1, self.num_classes])
+            )
+        cls_outputs = tf.concat(cls_outputs, axis=1)
+        box_outputs = tf.concat(box_outputs, axis=1)
+        return tf.concat([box_outputs, cls_outputs], axis=-1)