Skip to content
This repository was archived by the owner on Oct 31, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions demo/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from maskrcnn_benchmark import layers as L
from maskrcnn_benchmark.utils import cv2_util


class Resize(object):
def __init__(self, min_size, max_size):
self.min_size = min_size
Expand Down Expand Up @@ -42,6 +43,8 @@ def __call__(self, image):
size = self.get_size(image.size)
image = F.resize(image, size)
return image


class COCODemo(object):
# COCO categories for pretty print
CATEGORIES = [
Expand Down Expand Up @@ -197,9 +200,8 @@ def run_on_opencv_image(self, image):
image (np.ndarray): an image as returned by OpenCV

Returns:
prediction (BoxList): the detected objects. Additional information
of the detection properties can be found in the fields of
the BoxList via `prediction.fields()`
result (np.ndarray): an image with detected results
(boxes, masks, keypoints, etc) shown on top of it.
"""
predictions = self.compute_prediction(image)
top_predictions = self.select_top_predictions(predictions)
Expand Down Expand Up @@ -327,9 +329,7 @@ def overlay_mask(self, image, predictions):
)
image = cv2.drawContours(image, contours, -1, color, 3)

composite = image

return composite
return image

def overlay_keypoints(self, image, predictions):
keypoints = predictions.get_field("keypoints")
Expand All @@ -342,7 +342,7 @@ def overlay_keypoints(self, image, predictions):

def create_mask_montage(self, image, predictions):
"""
Create a montage showing the probability heatmaps for each one one of the
Create a montage showing the probability heatmaps for each one of the
detected objects

Arguments:
Expand Down
4 changes: 2 additions & 2 deletions maskrcnn_benchmark/config/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,8 @@
_C.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
# RoI minibatch size *per image* (number of regions of interest [ROIs])
# Total number of RoIs per training minibatch =
# TRAIN.BATCH_SIZE_PER_IM * TRAIN.IMS_PER_BATCH
# E.g., a common configuration is: 512 * 2 * 8 = 8192
# MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH
# E.g., a common configuration is: 512 * 16 = 8192
_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0)
_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
Expand Down
2 changes: 1 addition & 1 deletion maskrcnn_benchmark/data/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def make_data_loader(cfg, is_train=True, is_distributed=False, start_iter=0):
), "TEST.IMS_PER_BATCH ({}) must be divisible by the number of GPUs ({}) used.".format(
images_per_batch, num_gpus)
images_per_gpu = images_per_batch // num_gpus
shuffle = False if not is_distributed else True
shuffle = True if is_distributed else False
num_iters = None
start_iter = 0

Expand Down
8 changes: 4 additions & 4 deletions maskrcnn_benchmark/data/datasets/coco.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import torch
import torchvision

from torchvision.datasets.coco import CocoDetection
from maskrcnn_benchmark.structures.bounding_box import BoxList
from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask
from maskrcnn_benchmark.structures.keypoint import PersonKeypoints
Expand All @@ -25,18 +25,18 @@ def has_valid_annotation(anno):
# if all boxes have close to zero area, there is no annotation
if _has_only_empty_bbox(anno):
return False
# keypoints task have a slight different critera for considering
# keypoints task has a slight different criteria for considering
# if an annotation is valid
if "keypoints" not in anno[0]:
return True
# for keypoint detection tasks, only consider valid images those
# for keypoint detection task, only consider valid images those
# containing at least min_keypoints_per_image
if _count_visible_keypoints(anno) >= min_keypoints_per_image:
return True
return False


class COCODataset(torchvision.datasets.coco.CocoDetection):
class COCODataset(CocoDetection):
def __init__(
self, ann_file, root, remove_images_without_annotations, transforms=None
):
Expand Down
51 changes: 22 additions & 29 deletions maskrcnn_benchmark/data/datasets/evaluation/coco/coco_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import torch
from collections import OrderedDict
from tqdm import tqdm
from pycocotools.cocoeval import COCOeval

from maskrcnn_benchmark.modeling.roi_heads.mask_head.inference import Masker
from maskrcnn_benchmark.structures.bounding_box import BoxList
Expand Down Expand Up @@ -45,9 +46,9 @@ def do_coco_evaluation(
if "segm" in iou_types:
logger.info("Preparing segm results")
coco_results["segm"] = prepare_for_coco_segmentation(predictions, dataset)
if 'keypoints' in iou_types:
logger.info('Preparing keypoints results')
coco_results['keypoints'] = prepare_for_coco_keypoint(predictions, dataset)
if "keypoints" in iou_types:
logger.info("Preparing keypoints results")
coco_results["keypoints"] = prepare_for_coco_keypoint(predictions, dataset)

results = COCOResults(*iou_types)
logger.info("Evaluating predictions")
Expand All @@ -68,7 +69,6 @@ def do_coco_evaluation(


def prepare_for_coco_detection(predictions, dataset):
# assert isinstance(dataset, COCODataset)
coco_results = []
for image_id, prediction in enumerate(predictions):
original_id = dataset.id_to_img_map[image_id]
Expand Down Expand Up @@ -106,7 +106,6 @@ def prepare_for_coco_segmentation(predictions, dataset):
import numpy as np

masker = Masker(threshold=0.5, padding=1)
# assert isinstance(dataset, COCODataset)
coco_results = []
for image_id, prediction in tqdm(enumerate(predictions)):
original_id = dataset.id_to_img_map[image_id]
Expand All @@ -118,20 +117,15 @@ def prepare_for_coco_segmentation(predictions, dataset):
image_height = img_info["height"]
prediction = prediction.resize((image_width, image_height))
masks = prediction.get_field("mask")
# t = time.time()

# Masker is necessary only if masks haven't been already resized.
if list(masks.shape[-2:]) != [image_height, image_width]:
masks = masker(masks.expand(1, -1, -1, -1, -1), prediction)
masks = masks[0]
# logger.info('Time mask: {}'.format(time.time() - t))
# prediction = prediction.convert('xywh')

# boxes = prediction.bbox.tolist()
scores = prediction.get_field("scores").tolist()
labels = prediction.get_field("labels").tolist()

# rles = prediction.get_field('mask')

rles = [
mask_util.encode(np.array(mask[0, :, :, np.newaxis], order="F"))[0]
for mask in masks
Expand All @@ -156,33 +150,36 @@ def prepare_for_coco_segmentation(predictions, dataset):


def prepare_for_coco_keypoint(predictions, dataset):
# assert isinstance(dataset, COCODataset)
coco_results = []
for image_id, prediction in enumerate(predictions):
original_id = dataset.id_to_img_map[image_id]
if len(prediction.bbox) == 0:
continue

# TODO replace with get_img_info?
image_width = dataset.coco.imgs[original_id]['width']
image_height = dataset.coco.imgs[original_id]['height']
img_info = dataset.get_img_info(image_id)
image_width = img_info["width"]
image_height = img_info["height"]
prediction = prediction.resize((image_width, image_height))
prediction = prediction.convert('xywh')

boxes = prediction.bbox.tolist()
scores = prediction.get_field('scores').tolist()
labels = prediction.get_field('labels').tolist()
keypoints = prediction.get_field('keypoints')
scores = prediction.get_field("scores").tolist()
labels = prediction.get_field("labels").tolist()
keypoints = prediction.get_field("keypoints")
keypoints = keypoints.resize((image_width, image_height))
keypoints = keypoints.keypoints.view(keypoints.keypoints.shape[0], -1).tolist()

mapped_labels = [dataset.contiguous_category_id_to_json_id[i] for i in labels]

coco_results.extend([{
'image_id': original_id,
'category_id': mapped_labels[k],
'keypoints': keypoint,
'score': scores[k]} for k, keypoint in enumerate(keypoints)])
coco_results.extend(
[
{
"image_id": original_id,
"category_id": mapped_labels[k],
"keypoints": keypoint,
"score": scores[k]
}
for k, keypoint in enumerate(keypoints)
]
)
return coco_results

# inspired from Detectron
Expand Down Expand Up @@ -311,11 +308,8 @@ def evaluate_predictions_on_coco(
json.dump(coco_results, f)

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

coco_dt = coco_gt.loadRes(str(json_result_file)) if coco_results else COCO()

# coco_dt = coco_gt.loadRes(coco_results)
coco_eval = COCOeval(coco_gt, coco_dt, iou_type)
coco_eval.evaluate()
coco_eval.accumulate()
Expand Down Expand Up @@ -353,7 +347,6 @@ def __init__(self, *iou_types):
def update(self, coco_eval):
if coco_eval is None:
return
from pycocotools.cocoeval import COCOeval

assert isinstance(coco_eval, COCOeval)
s = coco_eval.stats
Expand Down
1 change: 0 additions & 1 deletion maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import torch
from torch import nn

from .roi_box_feature_extractors import make_roi_box_feature_extractor
from .roi_box_predictors import make_roi_box_predictor
Expand Down
6 changes: 2 additions & 4 deletions maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def forward(self, x, boxes):
x (tuple[tensor, tensor]): x contains the class logits
and the box_regression from the model.
boxes (list[BoxList]): bounding boxes that are used as
reference, one for ech image
reference, one for each image

Returns:
results (list[BoxList]): one BoxList for each image, containing
Expand All @@ -60,7 +60,7 @@ def forward(self, x, boxes):
# TODO think about a representation of batch of boxes
image_shapes = [box.size for box in boxes]
boxes_per_image = [len(box) for box in boxes]
concat_boxes = torch.cat([a.bbox for a in boxes], dim=0)
concat_boxes = torch.cat([box.bbox for box in boxes], dim=0)

if self.cls_agnostic_bbox_reg:
box_regression = box_regression[:, -4:]
Expand Down Expand Up @@ -150,8 +150,6 @@ def filter_results(self, boxlist, num_classes):


def make_roi_box_post_processor(cfg):
use_fpn = cfg.MODEL.ROI_HEADS.USE_FPN

bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS
box_coder = BoxCoder(weights=bbox_reg_weights)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import torch
from torch import nn
from torch.nn import functional as F

Expand Down Expand Up @@ -74,10 +73,8 @@ def __init__(self, cfg, in_channels):
def forward(self, x, proposals):
x = self.pooler(x, proposals)
x = x.view(x.size(0), -1)

x = F.relu(self.fc6(x))
x = F.relu(self.fc7(x))

return x


Expand Down Expand Up @@ -106,7 +103,7 @@ def __init__(self, cfg, in_channels):
dilation = cfg.MODEL.ROI_BOX_HEAD.DILATION

xconvs = []
for ix in range(num_stacked_convs):
for _ in range(num_stacked_convs):
xconvs.append(
nn.Conv2d(
in_channels,
Expand All @@ -127,9 +124,9 @@ def __init__(self, cfg, in_channels):
for modules in [self.xconvs,]:
for l in modules.modules():
if isinstance(l, nn.Conv2d):
torch.nn.init.normal_(l.weight, std=0.01)
nn.init.normal_(l.weight, std=0.01)
if not use_gn:
torch.nn.init.constant_(l.bias, 0)
nn.init.constant_(l.bias, 0)

input_size = conv_head_dim * resolution ** 2
representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,16 @@ def __init__(self, config, in_channels):
super(FastRCNNPredictor, self).__init__()
assert in_channels is not None

num_inputs = in_channels

num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES
self.avgpool = nn.AdaptiveAvgPool2d(1)
self.cls_score = nn.Linear(num_inputs, num_classes)
self.cls_score = nn.Linear(in_channels, num_classes)
num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4)
self.bbox_pred = nn.Linear(in_channels, num_bbox_reg_classes * 4)

nn.init.normal_(self.cls_score.weight, mean=0, std=0.01)
nn.init.constant_(self.cls_score.bias, 0)

nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001)
nn.init.constant_(self.bbox_pred.bias, 0)
for l in [self.cls_score, self.bbox_pred]:
nn.init.constant_(l.bias, 0)

def forward(self, x):
x = self.avgpool(x)
Expand All @@ -36,11 +33,10 @@ class FPNPredictor(nn.Module):
def __init__(self, cfg, in_channels):
super(FPNPredictor, self).__init__()
num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
representation_size = in_channels

self.cls_score = nn.Linear(representation_size, num_classes)
self.cls_score = nn.Linear(in_channels, num_classes)
num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4)
self.bbox_pred = nn.Linear(in_channels, num_bbox_reg_classes * 4)

nn.init.normal_(self.cls_score.weight, std=0.01)
nn.init.normal_(self.bbox_pred.weight, std=0.001)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
class ROIKeypointHead(torch.nn.Module):
def __init__(self, cfg, in_channels):
super(ROIKeypointHead, self).__init__()
self.cfg = cfg.clone()
self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels)
self.predictor = make_roi_keypoint_predictor(
cfg, self.feature_extractor.out_channels)
Expand All @@ -27,10 +26,11 @@ def forward(self, features, proposals, targets=None):
x (Tensor): the result of the feature extractor
proposals (list[BoxList]): during training, the original proposals
are returned. During testing, the predicted boxlists are returned
with the `mask` field set
with the `keypoints` field set
losses (dict[Tensor]): During training, returns the losses for the
head. During testing, returns an empty dict.
"""

if self.training:
with torch.no_grad():
proposals = self.loss_evaluator.subsample(proposals, targets)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
from torch import nn

from maskrcnn_benchmark import layers
from maskrcnn_benchmark.layers import ConvTranspose2d
from maskrcnn_benchmark.layers import interpolate
from maskrcnn_benchmark.modeling import registry


@registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor")
class KeypointRCNNPredictor(nn.Module):
def __init__(self, cfg, in_channels):
super(KeypointRCNNPredictor, self).__init__()
input_features = in_channels
num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES
deconv_kernel = 4
self.kps_score_lowres = layers.ConvTranspose2d(
input_features,
self.kps_score_lowres = ConvTranspose2d(
in_channels,
num_keypoints,
deconv_kernel,
stride=2,
Expand All @@ -27,7 +27,7 @@ def __init__(self, cfg, in_channels):

def forward(self, x):
x = self.kps_score_lowres(x)
x = layers.interpolate(
x = interpolate(
x, scale_factor=self.up_scale, mode="bilinear", align_corners=False
)
return x
Expand Down
Loading