Skip to content
This repository was archived by the owner on Oct 31, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions maskrcnn_benchmark/modeling/detector/generalized_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,9 @@ def forward(self, images, targets=None):
features = self.backbone(images.tensors)
proposals, proposal_losses = self.rpn(images, features, targets)
if self.roi_heads:
x, result, detector_losses = self.roi_heads(features, proposals, targets)
_, result, detector_losses = self.roi_heads(features, proposals, targets)
else:
# RPN-only models don't have roi_heads
x = features
result = proposals
detector_losses = {}

Expand Down
1 change: 0 additions & 1 deletion maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import torch
from torch import nn

from .roi_box_feature_extractors import make_roi_box_feature_extractor
from .roi_box_predictors import make_roi_box_predictor
Expand Down
4 changes: 1 addition & 3 deletions maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def forward(self, x, boxes):
# TODO think about a representation of batch of boxes
image_shapes = [box.size for box in boxes]
boxes_per_image = [len(box) for box in boxes]
concat_boxes = torch.cat([a.bbox for a in boxes], dim=0)
concat_boxes = torch.cat([box.bbox for box in boxes], dim=0)

if self.cls_agnostic_bbox_reg:
box_regression = box_regression[:, -4:]
Expand Down Expand Up @@ -150,8 +150,6 @@ def filter_results(self, boxlist, num_classes):


def make_roi_box_post_processor(cfg):
use_fpn = cfg.MODEL.ROI_HEADS.USE_FPN

bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS
box_coder = BoxCoder(weights=bbox_reg_weights)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import torch
from torch import nn
from torch.nn import functional as F

Expand Down Expand Up @@ -74,10 +73,8 @@ def __init__(self, cfg, in_channels):
def forward(self, x, proposals):
x = self.pooler(x, proposals)
x = x.view(x.size(0), -1)

x = F.relu(self.fc6(x))
x = F.relu(self.fc7(x))

return x


Expand Down Expand Up @@ -106,7 +103,7 @@ def __init__(self, cfg, in_channels):
dilation = cfg.MODEL.ROI_BOX_HEAD.DILATION

xconvs = []
for ix in range(num_stacked_convs):
for _ in range(num_stacked_convs):
xconvs.append(
nn.Conv2d(
in_channels,
Expand All @@ -127,9 +124,9 @@ def __init__(self, cfg, in_channels):
for modules in [self.xconvs,]:
for l in modules.modules():
if isinstance(l, nn.Conv2d):
torch.nn.init.normal_(l.weight, std=0.01)
nn.init.normal_(l.weight, std=0.01)
if not use_gn:
torch.nn.init.constant_(l.bias, 0)
nn.init.constant_(l.bias, 0)

input_size = conv_head_dim * resolution ** 2
representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,16 @@ def __init__(self, config, in_channels):
super(FastRCNNPredictor, self).__init__()
assert in_channels is not None

num_inputs = in_channels

num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES
self.avgpool = nn.AdaptiveAvgPool2d(1)
self.cls_score = nn.Linear(num_inputs, num_classes)
self.cls_score = nn.Linear(in_channels, num_classes)
num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4)
self.bbox_pred = nn.Linear(in_channels, num_bbox_reg_classes * 4)

nn.init.normal_(self.cls_score.weight, mean=0, std=0.01)
nn.init.constant_(self.cls_score.bias, 0)

nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001)
nn.init.constant_(self.bbox_pred.bias, 0)
for l in [self.cls_score, self.bbox_pred]:
nn.init.constant_(l.bias, 0)

def forward(self, x):
x = self.avgpool(x)
Expand All @@ -36,11 +33,10 @@ class FPNPredictor(nn.Module):
def __init__(self, cfg, in_channels):
super(FPNPredictor, self).__init__()
num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
representation_size = in_channels

self.cls_score = nn.Linear(representation_size, num_classes)
self.cls_score = nn.Linear(in_channels, num_classes)
num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4)
self.bbox_pred = nn.Linear(in_channels, num_bbox_reg_classes * 4)

nn.init.normal_(self.cls_score.weight, std=0.01)
nn.init.normal_(self.bbox_pred.weight, std=0.001)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
class ROIKeypointHead(torch.nn.Module):
def __init__(self, cfg, in_channels):
super(ROIKeypointHead, self).__init__()
self.cfg = cfg.clone()
self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels)
self.predictor = make_roi_keypoint_predictor(
cfg, self.feature_extractor.out_channels)
Expand All @@ -27,10 +26,11 @@ def forward(self, features, proposals, targets=None):
x (Tensor): the result of the feature extractor
proposals (list[BoxList]): during training, the original proposals
are returned. During testing, the predicted boxlists are returned
with the `mask` field set
with the `keypoint` field set
losses (dict[Tensor]): During training, returns the losses for the
head. During testing, returns an empty dict.
"""

if self.training:
with torch.no_grad():
proposals = self.loss_evaluator.subsample(proposals, targets)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
from torch import nn

from maskrcnn_benchmark import layers
from maskrcnn_benchmark.layers import ConvTranspose2d
from maskrcnn_benchmark.layers import interpolate
from maskrcnn_benchmark.modeling import registry


@registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor")
class KeypointRCNNPredictor(nn.Module):
def __init__(self, cfg, in_channels):
super(KeypointRCNNPredictor, self).__init__()
input_features = in_channels
num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES
deconv_kernel = 4
self.kps_score_lowres = layers.ConvTranspose2d(
input_features,
self.kps_score_lowres = ConvTranspose2d(
in_channels,
num_keypoints,
deconv_kernel,
stride=2,
Expand All @@ -27,7 +27,7 @@ def __init__(self, cfg, in_channels):

def forward(self, x):
x = self.kps_score_lowres(x)
x = layers.interpolate(
x = interpolate(
x, scale_factor=self.up_scale, mode="bilinear", align_corners=False
)
return x
Expand Down
10 changes: 3 additions & 7 deletions maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import torch
from torch import nn
from maskrcnn_benchmark.layers.misc import interpolate

from maskrcnn_benchmark.structures.bounding_box import BoxList


Expand Down Expand Up @@ -70,7 +69,6 @@ class MaskPostProcessorCOCOFormat(MaskPostProcessor):

def forward(self, x, boxes):
import pycocotools.mask as mask_util
import numpy as np

results = super(MaskPostProcessorCOCOFormat, self).forward(x, boxes)
for result in results:
Expand All @@ -87,7 +85,7 @@ def forward(self, x, boxes):

# the next two functions should be merged inside Masker
# but are kept here for the moment while we need them
# temporarily gor paste_mask_in_image
# temporarily for paste_mask_in_image
def expand_boxes(boxes, scale):
w_half = (boxes[:, 2] - boxes[:, 0]) * .5
h_half = (boxes[:, 3] - boxes[:, 1]) * .5
Expand Down Expand Up @@ -127,10 +125,8 @@ def paste_mask_in_image(mask, box, im_h, im_w, thresh=0.5, padding=1):
box = box.to(dtype=torch.int32)

TO_REMOVE = 1
w = int(box[2] - box[0] + TO_REMOVE)
h = int(box[3] - box[1] + TO_REMOVE)
w = max(w, 1)
h = max(h, 1)
w = max(int(box[2] - box[0] + TO_REMOVE), 1)
h = max(int(box[3] - box[1] + TO_REMOVE), 1)

# Set shape to [batchxCxHxW]
mask = mask.expand((1, 1, -1, -1))
Expand Down
7 changes: 4 additions & 3 deletions maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def project_masks_on_boxes(segmentation_masks, proposals, discretization_size):
Arguments:
segmentation_masks: an instance of SegmentationMask
proposals: an instance of BoxList
discretization_size: spatial resolution of masks
"""
masks = []
M = discretization_size
Expand All @@ -33,9 +34,9 @@ def project_masks_on_boxes(segmentation_masks, proposals, discretization_size):
for segmentation_mask, proposal in zip(segmentation_masks, proposals):
# crop the masks, resize them to the desired resolution and
# then convert them to the tensor representation.
cropped_mask = segmentation_mask.crop(proposal)
scaled_mask = cropped_mask.resize((M, M))
mask = scaled_mask.get_mask_tensor()
mask = segmentation_mask.crop(proposal)
mask = mask.resize((M, M))
mask = mask.get_mask_tensor()
masks.append(mask)
if len(masks) == 0:
return torch.empty(0, dtype=torch.float32, device=device)
Expand Down
3 changes: 1 addition & 2 deletions maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import torch
from torch import nn

from maskrcnn_benchmark.structures.bounding_box import BoxList

Expand All @@ -21,9 +20,9 @@ def keep_only_positive_boxes(boxes):
assert isinstance(boxes, (list, tuple))
assert isinstance(boxes[0], BoxList)
assert boxes[0].has_field("labels")

positive_boxes = []
positive_inds = []
num_boxes = 0
for boxes_per_image in boxes:
labels = boxes_per_image.get_field("labels")
inds_mask = labels > 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,10 @@
@registry.ROI_MASK_FEATURE_EXTRACTORS.register("MaskRCNNFPNFeatureExtractor")
class MaskRCNNFPNFeatureExtractor(nn.Module):
"""
Heads for FPN for classification
Heads for FPN for segmentation
"""

def __init__(self, cfg, in_channels):
"""
Arguments:
num_classes (int): number of output classes
input_size (int): number of channels of the input once it's flattened
representation_size (int): size of the intermediate representation
"""
super(MaskRCNNFPNFeatureExtractor, self).__init__()

resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
Expand All @@ -36,14 +30,13 @@ def __init__(self, cfg, in_channels):
scales=scales,
sampling_ratio=sampling_ratio,
)
input_size = in_channels
self.pooler = pooler

use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

next_feature = input_size
next_feature = in_channels
self.blocks = []
for layer_idx, layer_features in enumerate(layers, 1):
layer_name = "mask_fcn{}".format(layer_idx)
Expand All @@ -58,7 +51,6 @@ def __init__(self, cfg, in_channels):

def forward(self, x, proposals):
x = self.pooler(x, proposals)

for layer_name in self.blocks:
x = F.relu(getattr(self, layer_name)(x))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@
class MaskRCNNC4Predictor(nn.Module):
def __init__(self, cfg, in_channels):
super(MaskRCNNC4Predictor, self).__init__()

num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
num_inputs = in_channels

self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
self.conv5_mask = ConvTranspose2d(in_channels, dim_reduced, 2, 2, 0)
self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)

for name, param in self.named_parameters():
Expand All @@ -35,10 +34,9 @@ def forward(self, x):
class MaskRCNNConv1x1Predictor(nn.Module):
def __init__(self, cfg, in_channels):
super(MaskRCNNConv1x1Predictor, self).__init__()
num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
num_inputs = in_channels

self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0)
num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
self.mask_fcn_logits = Conv2d(in_channels, num_classes, 1, 1, 0)

for name, param in self.named_parameters():
if "bias" in name:
Expand Down
22 changes: 12 additions & 10 deletions maskrcnn_benchmark/modeling/roi_heads/roi_heads.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@ def __init__(self, cfg, heads):
super(CombinedROIHeads, self).__init__(heads)
self.cfg = cfg.clone()
if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
assert cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION == cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
self.mask.feature_extractor = self.box.feature_extractor
if cfg.MODEL.KEYPOINT_ON and cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
assert cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION == cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
self.keypoint.feature_extractor = self.box.feature_extractor

def forward(self, features, proposals, targets=None):
losses = {}
# TODO rename x to roi_box_features, if it doesn't increase memory consumption
x, detections, loss_box = self.box(features, proposals, targets)
box_features, detections, loss_box = self.box(features, proposals, targets)
losses.update(loss_box)
if self.cfg.MODEL.MASK_ON:
mask_features = features
Expand All @@ -33,26 +34,27 @@ def forward(self, features, proposals, targets=None):
self.training
and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
):
mask_features = x
# During training, self.box() will return the unaltered proposals as "detections"
mask_features = box_features
# During training, self.mask() will return the unaltered proposals as "detections"
# this makes the API consistent during training and testing
x, detections, loss_mask = self.mask(mask_features, detections, targets)
_, detections, loss_mask = self.mask(mask_features, detections, targets)
losses.update(loss_mask)

if self.cfg.MODEL.KEYPOINT_ON:
keypoint_features = features
# optimization: during training, if we share the feature extractor between
# the box and the mask heads, then we can reuse the features already computed
# the box and the keypoint heads, then we can reuse the features already computed
if (
self.training
and self.cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
):
keypoint_features = x
# During training, self.box() will return the unaltered proposals as "detections"
keypoint_features = box_features
# During training, self.keypoint() will return the unaltered proposals as "detections"
# this makes the API consistent during training and testing
x, detections, loss_keypoint = self.keypoint(keypoint_features, detections, targets)
_, detections, loss_keypoint = self.keypoint(keypoint_features, detections, targets)
losses.update(loss_keypoint)
return x, detections, losses

return box_features, detections, losses


def build_roi_heads(cfg, in_channels):
Expand Down
Loading