facebookresearch · Johnqczhang · Jun 17, 2019 · Jul 1, 2019 · Jul 1, 2019 · Jul 1, 2019
diff --git a/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py b/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py
@@ -49,10 +49,9 @@ def forward(self, images, targets=None):
         features = self.backbone(images.tensors)
         proposals, proposal_losses = self.rpn(images, features, targets)
         if self.roi_heads:
-            x, result, detector_losses = self.roi_heads(features, proposals, targets)
+            _, result, detector_losses = self.roi_heads(features, proposals, targets)
         else:
             # RPN-only models don't have roi_heads
-            x = features
             result = proposals
             detector_losses = {}
 

diff --git a/maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py b/maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py
@@ -1,6 +1,5 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 import torch
-from torch import nn
 
 from .roi_box_feature_extractors import make_roi_box_feature_extractor
 from .roi_box_predictors import make_roi_box_predictor

diff --git a/maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py b/maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py
@@ -60,7 +60,7 @@ def forward(self, x, boxes):
         # TODO think about a representation of batch of boxes
         image_shapes = [box.size for box in boxes]
         boxes_per_image = [len(box) for box in boxes]
-        concat_boxes = torch.cat([a.bbox for a in boxes], dim=0)
+        concat_boxes = torch.cat([box.bbox for box in boxes], dim=0)
 
         if self.cls_agnostic_bbox_reg:
             box_regression = box_regression[:, -4:]
@@ -150,8 +150,6 @@ def filter_results(self, boxlist, num_classes):
 
 
 def make_roi_box_post_processor(cfg):
-    use_fpn = cfg.MODEL.ROI_HEADS.USE_FPN
-
     bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS
     box_coder = BoxCoder(weights=bbox_reg_weights)
 

diff --git a/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_feature_extractors.py b/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_feature_extractors.py
@@ -1,5 +1,4 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-import torch
 from torch import nn
 from torch.nn import functional as F
 
@@ -74,10 +73,8 @@ def __init__(self, cfg, in_channels):
     def forward(self, x, proposals):
         x = self.pooler(x, proposals)
         x = x.view(x.size(0), -1)
-
         x = F.relu(self.fc6(x))
         x = F.relu(self.fc7(x))
-
         return x
 
 
@@ -106,7 +103,7 @@ def __init__(self, cfg, in_channels):
         dilation = cfg.MODEL.ROI_BOX_HEAD.DILATION
 
         xconvs = []
-        for ix in range(num_stacked_convs):
+        for _ in range(num_stacked_convs):
             xconvs.append(
                 nn.Conv2d(
                     in_channels,
@@ -127,9 +124,9 @@ def __init__(self, cfg, in_channels):
         for modules in [self.xconvs,]:
             for l in modules.modules():
                 if isinstance(l, nn.Conv2d):
-                    torch.nn.init.normal_(l.weight, std=0.01)
+                    nn.init.normal_(l.weight, std=0.01)
                     if not use_gn:
-                        torch.nn.init.constant_(l.bias, 0)
+                        nn.init.constant_(l.bias, 0)
 
         input_size = conv_head_dim * resolution ** 2
         representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM

diff --git a/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py b/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py
@@ -9,19 +9,16 @@ def __init__(self, config, in_channels):
         super(FastRCNNPredictor, self).__init__()
         assert in_channels is not None
 
-        num_inputs = in_channels
-
         num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES
         self.avgpool = nn.AdaptiveAvgPool2d(1)
-        self.cls_score = nn.Linear(num_inputs, num_classes)
+        self.cls_score = nn.Linear(in_channels, num_classes)
         num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
-        self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4)
+        self.bbox_pred = nn.Linear(in_channels, num_bbox_reg_classes * 4)
 
         nn.init.normal_(self.cls_score.weight, mean=0, std=0.01)
-        nn.init.constant_(self.cls_score.bias, 0)
-
         nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001)
-        nn.init.constant_(self.bbox_pred.bias, 0)
+        for l in [self.cls_score, self.bbox_pred]:
+            nn.init.constant_(l.bias, 0)
 
     def forward(self, x):
         x = self.avgpool(x)
@@ -36,11 +33,10 @@ class FPNPredictor(nn.Module):
     def __init__(self, cfg, in_channels):
         super(FPNPredictor, self).__init__()
         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
-        representation_size = in_channels
 
-        self.cls_score = nn.Linear(representation_size, num_classes)
+        self.cls_score = nn.Linear(in_channels, num_classes)
         num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
-        self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4)
+        self.bbox_pred = nn.Linear(in_channels, num_bbox_reg_classes * 4)
 
         nn.init.normal_(self.cls_score.weight, std=0.01)
         nn.init.normal_(self.bbox_pred.weight, std=0.001)

diff --git a/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/keypoint_head.py b/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/keypoint_head.py
@@ -9,7 +9,6 @@
 class ROIKeypointHead(torch.nn.Module):
     def __init__(self, cfg, in_channels):
         super(ROIKeypointHead, self).__init__()
-        self.cfg = cfg.clone()
         self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels)
         self.predictor = make_roi_keypoint_predictor(
             cfg, self.feature_extractor.out_channels)
@@ -27,10 +26,11 @@ def forward(self, features, proposals, targets=None):
             x (Tensor): the result of the feature extractor
             proposals (list[BoxList]): during training, the original proposals
                 are returned. During testing, the predicted boxlists are returned
-                with the `mask` field set
+                with the `keypoint` field set
             losses (dict[Tensor]): During training, returns the losses for the
                 head. During testing, returns an empty dict.
         """
+
         if self.training:
             with torch.no_grad():
                 proposals = self.loss_evaluator.subsample(proposals, targets)

diff --git a/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py b/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py
@@ -1,18 +1,18 @@
 from torch import nn
 
-from maskrcnn_benchmark import layers
+from maskrcnn_benchmark.layers import ConvTranspose2d
+from maskrcnn_benchmark.layers import interpolate
 from maskrcnn_benchmark.modeling import registry
 
 
 @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor")
 class KeypointRCNNPredictor(nn.Module):
     def __init__(self, cfg, in_channels):
         super(KeypointRCNNPredictor, self).__init__()
-        input_features = in_channels
         num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES
         deconv_kernel = 4
-        self.kps_score_lowres = layers.ConvTranspose2d(
-            input_features,
+        self.kps_score_lowres = ConvTranspose2d(
+            in_channels,
             num_keypoints,
             deconv_kernel,
             stride=2,
@@ -27,7 +27,7 @@ def __init__(self, cfg, in_channels):
 
     def forward(self, x):
         x = self.kps_score_lowres(x)
-        x = layers.interpolate(
+        x = interpolate(
             x, scale_factor=self.up_scale, mode="bilinear", align_corners=False
         )
         return x

diff --git a/maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py b/maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py
@@ -3,7 +3,6 @@
 import torch
 from torch import nn
 from maskrcnn_benchmark.layers.misc import interpolate
-
 from maskrcnn_benchmark.structures.bounding_box import BoxList
 
 
@@ -70,7 +69,6 @@ class MaskPostProcessorCOCOFormat(MaskPostProcessor):
 
     def forward(self, x, boxes):
         import pycocotools.mask as mask_util
-        import numpy as np
 
         results = super(MaskPostProcessorCOCOFormat, self).forward(x, boxes)
         for result in results:
@@ -87,7 +85,7 @@ def forward(self, x, boxes):
 
 # the next two functions should be merged inside Masker
 # but are kept here for the moment while we need them
-# temporarily gor paste_mask_in_image
+# temporarily for paste_mask_in_image
 def expand_boxes(boxes, scale):
     w_half = (boxes[:, 2] - boxes[:, 0]) * .5
     h_half = (boxes[:, 3] - boxes[:, 1]) * .5
@@ -127,10 +125,8 @@ def paste_mask_in_image(mask, box, im_h, im_w, thresh=0.5, padding=1):
     box = box.to(dtype=torch.int32)
 
     TO_REMOVE = 1
-    w = int(box[2] - box[0] + TO_REMOVE)
-    h = int(box[3] - box[1] + TO_REMOVE)
-    w = max(w, 1)
-    h = max(h, 1)
+    w = max(int(box[2] - box[0] + TO_REMOVE), 1)
+    h = max(int(box[3] - box[1] + TO_REMOVE), 1)
 
     # Set shape to [batchxCxHxW]
     mask = mask.expand((1, 1, -1, -1))

diff --git a/maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py b/maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py
@@ -19,6 +19,7 @@ def project_masks_on_boxes(segmentation_masks, proposals, discretization_size):
     Arguments:
         segmentation_masks: an instance of SegmentationMask
         proposals: an instance of BoxList
+        discretization_size: spatial resolution of masks
     """
     masks = []
     M = discretization_size
@@ -33,9 +34,9 @@ def project_masks_on_boxes(segmentation_masks, proposals, discretization_size):
     for segmentation_mask, proposal in zip(segmentation_masks, proposals):
         # crop the masks, resize them to the desired resolution and
         # then convert them to the tensor representation.
-        cropped_mask = segmentation_mask.crop(proposal)
-        scaled_mask = cropped_mask.resize((M, M))
-        mask = scaled_mask.get_mask_tensor()
+        mask = segmentation_mask.crop(proposal)
+        mask = mask.resize((M, M))
+        mask = mask.get_mask_tensor()
         masks.append(mask)
     if len(masks) == 0:
         return torch.empty(0, dtype=torch.float32, device=device)

diff --git a/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py b/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py
@@ -1,6 +1,5 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 import torch
-from torch import nn
 
 from maskrcnn_benchmark.structures.bounding_box import BoxList
 
@@ -21,9 +20,9 @@ def keep_only_positive_boxes(boxes):
     assert isinstance(boxes, (list, tuple))
     assert isinstance(boxes[0], BoxList)
     assert boxes[0].has_field("labels")
+
     positive_boxes = []
     positive_inds = []
-    num_boxes = 0
     for boxes_per_image in boxes:
         labels = boxes_per_image.get_field("labels")
         inds_mask = labels > 0

diff --git a/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py b/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py
@@ -16,16 +16,10 @@
 @registry.ROI_MASK_FEATURE_EXTRACTORS.register("MaskRCNNFPNFeatureExtractor")
 class MaskRCNNFPNFeatureExtractor(nn.Module):
     """
-    Heads for FPN for classification
+    Heads for FPN for segmentation
     """
 
     def __init__(self, cfg, in_channels):
-        """
-        Arguments:
-            num_classes (int): number of output classes
-            input_size (int): number of channels of the input once it's flattened
-            representation_size (int): size of the intermediate representation
-        """
         super(MaskRCNNFPNFeatureExtractor, self).__init__()
 
         resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
@@ -36,14 +30,13 @@ def __init__(self, cfg, in_channels):
             scales=scales,
             sampling_ratio=sampling_ratio,
         )
-        input_size = in_channels
         self.pooler = pooler
 
         use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
         layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
         dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION
 
-        next_feature = input_size
+        next_feature = in_channels
         self.blocks = []
         for layer_idx, layer_features in enumerate(layers, 1):
             layer_name = "mask_fcn{}".format(layer_idx)
@@ -58,7 +51,6 @@ def __init__(self, cfg, in_channels):
 
     def forward(self, x, proposals):
         x = self.pooler(x, proposals)
-
         for layer_name in self.blocks:
             x = F.relu(getattr(self, layer_name)(x))
 

diff --git a/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py b/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py
@@ -11,11 +11,10 @@
 class MaskRCNNC4Predictor(nn.Module):
     def __init__(self, cfg, in_channels):
         super(MaskRCNNC4Predictor, self).__init__()
+
         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
         dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
-        num_inputs = in_channels
-
-        self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
+        self.conv5_mask = ConvTranspose2d(in_channels, dim_reduced, 2, 2, 0)
         self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
 
         for name, param in self.named_parameters():
@@ -35,10 +34,9 @@ def forward(self, x):
 class MaskRCNNConv1x1Predictor(nn.Module):
     def __init__(self, cfg, in_channels):
         super(MaskRCNNConv1x1Predictor, self).__init__()
-        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
-        num_inputs = in_channels
 
-        self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0)
+        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
+        self.mask_fcn_logits = Conv2d(in_channels, num_classes, 1, 1, 0)
 
         for name, param in self.named_parameters():
             if "bias" in name:

diff --git a/maskrcnn_benchmark/modeling/roi_heads/roi_heads.py b/maskrcnn_benchmark/modeling/roi_heads/roi_heads.py
@@ -16,14 +16,15 @@ def __init__(self, cfg, heads):
         super(CombinedROIHeads, self).__init__(heads)
         self.cfg = cfg.clone()
         if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
+            assert cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION == cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
             self.mask.feature_extractor = self.box.feature_extractor
         if cfg.MODEL.KEYPOINT_ON and cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
+            assert cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION == cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
             self.keypoint.feature_extractor = self.box.feature_extractor
 
     def forward(self, features, proposals, targets=None):
         losses = {}
-        # TODO rename x to roi_box_features, if it doesn't increase memory consumption
-        x, detections, loss_box = self.box(features, proposals, targets)
+        box_features, detections, loss_box = self.box(features, proposals, targets)
         losses.update(loss_box)
         if self.cfg.MODEL.MASK_ON:
             mask_features = features
@@ -33,26 +34,27 @@ def forward(self, features, proposals, targets=None):
                 self.training
                 and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
             ):
-                mask_features = x
-            # During training, self.box() will return the unaltered proposals as "detections"
+                mask_features = box_features
+            # During training, self.mask() will return the unaltered proposals as "detections"
             # this makes the API consistent during training and testing
-            x, detections, loss_mask = self.mask(mask_features, detections, targets)
+            _, detections, loss_mask = self.mask(mask_features, detections, targets)
             losses.update(loss_mask)
 
         if self.cfg.MODEL.KEYPOINT_ON:
             keypoint_features = features
             # optimization: during training, if we share the feature extractor between
-            # the box and the mask heads, then we can reuse the features already computed
+            # the box and the keypoint heads, then we can reuse the features already computed
             if (
                 self.training
                 and self.cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
             ):
-                keypoint_features = x
-            # During training, self.box() will return the unaltered proposals as "detections"
+                keypoint_features = box_features
+            # During training, self.keypoint() will return the unaltered proposals as "detections"
             # this makes the API consistent during training and testing
-            x, detections, loss_keypoint = self.keypoint(keypoint_features, detections, targets)
+            _, detections, loss_keypoint = self.keypoint(keypoint_features, detections, targets)
             losses.update(loss_keypoint)
-        return x, detections, losses
+
+        return box_features, detections, losses
 
 
 def build_roi_heads(cfg, in_channels):