diff --git a/README.md b/README.md index 20fdc55..e1a332f 100644 --- a/README.md +++ b/README.md @@ -100,14 +100,16 @@ python run_net.py --config-file=configs/base.py --task=test | S2ANet-R50-FPN | DOTA1.0|1024/200| flip|-| SGD | 1x | 74.11 | [arxiv](https://arxiv.org/abs/2008.09397)| [config](configs/s2anet/s2anet_r50_fpn_1x_dota.py) | [model](https://cloud.tsinghua.edu.cn/d/918bcbf7a10a40fb8dee/files/?p=%2Fmodels%2Fs2anet_r50_fpn_1x_dota_bs2_steplr_3%2Fckpt_12.pkl&dl=1) | | S2ANet-R50-FPN | DOTA1.0| 1024/200| flip+ra90+bc|-| SGD | 1x | 76.40 | [arxiv](https://arxiv.org/abs/2008.09397)| [config](projects/s2anet/configs/s2anet_r50_fpn_1x_dota_rotate_balance.py) | [model](https://cloud.tsinghua.edu.cn/d/918bcbf7a10a40fb8dee/files/?p=%2Fmodels%2Fs2anet_r50_fpn_1x_dota_rotate_balance%2Fckpt_12.pkl&dl=1) | | S2ANet-R50-FPN | DOTA1.0|1024/200| flip+ra90+bc+ms |ms| SGD | 1x | 79.72 | [arxiv](https://arxiv.org/abs/2008.09397)| [config](projects/s2anet/configs/s2anet_r50_fpn_1x_dota_rotate_balance_ms.py) | [model](https://cloud.tsinghua.edu.cn/d/918bcbf7a10a40fb8dee/files/?p=%2Fmodels%2Fs2anet_r50_fpn_1x_dota_rotate_balance_ms%2Fckpt_12.pkl&dl=1) | -| S2ANet-R101-FPN |DOTA1.0|1024/200|Flip|-| SGD | 1x | 74.28 | [arxiv](https://arxiv.org/abs/2008.09397)| [config](projects/s2anet/configs/s2anet_r101_fpn_1x_dota_bs2.py) | [model](https://cloud.tsinghua.edu.cn/d/918bcbf7a10a40fb8dee/files/?p=%2Fmodels%2Fs2anet_r101_fpn_1x_dota_without_torch_pretrained%2Fckpt_12.pkl&dl=1) | -| Gliding-R50-FPN |DOTA1.0|1024/200|Flip|-| SGD | 1x | 72.93 | [arxiv](https://arxiv.org/abs/1911.09358)| [config](projects/gliding/configs/gliding_r50_fpn_1x_dota_with_flip.py) | [model](https://cloud.tsinghua.edu.cn/f/ebeefa1edaf84a4d8a2a/?dl=1) | -| Gliding-R50-FPN |DOTA1.0|1024/200|Flip+ra90+bc|-| SGD | 1x | 74.93 | [arxiv](https://arxiv.org/abs/1911.09358)| [config](projects/gliding/configs/gliding_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py) | [model](https://cloud.tsinghua.edu.cn/f/395ecd3ddaf44bb58ac9/?dl=1) | +| S2ANet-R101-FPN |DOTA1.0|1024/200|flip|-| SGD | 1x | 74.28 | [arxiv](https://arxiv.org/abs/2008.09397)| [config](projects/s2anet/configs/s2anet_r101_fpn_1x_dota_bs2.py) | [model](https://cloud.tsinghua.edu.cn/d/918bcbf7a10a40fb8dee/files/?p=%2Fmodels%2Fs2anet_r101_fpn_1x_dota_without_torch_pretrained%2Fckpt_12.pkl&dl=1) | +| Gliding-R50-FPN |DOTA1.0|1024/200|flip|-| SGD | 1x | 72.93 | [arxiv](https://arxiv.org/abs/1911.09358)| [config](projects/gliding/configs/gliding_r50_fpn_1x_dota_with_flip.py) | [model](https://cloud.tsinghua.edu.cn/f/ebeefa1edaf84a4d8a2a/?dl=1) | +| Gliding-R50-FPN |DOTA1.0|1024/200|flip+ra90+bc|-| SGD | 1x | 74.93 | [arxiv](https://arxiv.org/abs/1911.09358)| [config](projects/gliding/configs/gliding_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py) | [model](https://cloud.tsinghua.edu.cn/f/395ecd3ddaf44bb58ac9/?dl=1) | | RetinaNet-R50-FPN |DOTA1.0|600/150|-|-| SGD | - | 62.503 | [arxiv](https://arxiv.org/abs/1708.02002)| [config](configs/retinanet_r50v1d_fpn_dota.py) | [model](https://cloud.tsinghua.edu.cn/f/f12bb566d4be43bfbdc7/) [pretrained](https://cloud.tsinghua.edu.cn/f/6b5db5fdd5304a5abf19/) | -| FasterRCNN-R50-FPN |DOTA1.0|1024/200|Flip|-| SGD | 1x | 69.631 | [arxiv](https://arxiv.org/abs/1506.01497)| [config](configs/faster_rcnn_obb_r50_fpn_1x_dota.py) | [model](https://cloud.tsinghua.edu.cn/f/29197095057348d0a392/?dl=1) | -| RoITransformer-R50-FPN |DOTA1.0|1024/200|Flip|-| SGD | 1x | 73.842 | [arxiv](https://arxiv.org/abs/1812.00155)| [config](configs/faster_rcnn_RoITrans_r50_fpn_1x_dota.py) | [model](https://cloud.tsinghua.edu.cn/f/55fe6380928f4a6582f8/?dl=1) | +| FasterRCNN-R50-FPN |DOTA1.0|1024/200|flip|-| SGD | 1x | 69.631 | [arxiv](https://arxiv.org/abs/1506.01497)| [config](configs/faster_rcnn_obb_r50_fpn_1x_dota.py) | [model](https://cloud.tsinghua.edu.cn/f/29197095057348d0a392/?dl=1) | +| RoITransformer-R50-FPN |DOTA1.0|1024/200|flip|-| SGD | 1x | 73.842 | [arxiv](https://arxiv.org/abs/1812.00155)| [config](configs/faster_rcnn_RoITrans_r50_fpn_1x_dota.py) | [model](https://cloud.tsinghua.edu.cn/f/55fe6380928f4a6582f8/?dl=1) | | FCOS-R50-FPN | DOTA1.0|1024/200| flip|-| SGD | 1x | 70.40 | [ICCV19](https://openaccess.thecvf.com/content_ICCV_2019/papers/Tian_FCOS_Fully_Convolutional_One-Stage_Object_Detection_ICCV_2019_paper.pdf)| [config](configs/fcos_obb_r50_fpn_1x_dota.py) | [model](https://cloud.tsinghua.edu.cn/d/918bcbf7a10a40fb8dee/files/?p=%2Fmodels%2Ffcos_r50%2Fckpt_12.pkl&dl=1) | -| OrientedRCNN-R50-FPN |DOTA1.0|1024/200|Flip|-| SGD | 1x | 75.62 | [ICCV21](https://openaccess.thecvf.com/content/ICCV2021/papers/Xie_Oriented_R-CNN_for_Object_Detection_ICCV_2021_paper.pdf)| [config](configs/oriented_rcnn_r50_fpn_1x_dota_with_flip.py) | [model](https://cloud.tsinghua.edu.cn/f/a50517f7b8e840949d3f/?dl=1) | +| OrientedRCNN-R50-FPN |DOTA1.0|1024/200|flip|-| SGD | 1x | 75.62 | [ICCV21](https://openaccess.thecvf.com/content/ICCV2021/papers/Xie_Oriented_R-CNN_for_Object_Detection_ICCV_2021_paper.pdf)| [config](configs/oriented_rcnn_r50_fpn_1x_dota_with_flip.py) | [model](https://cloud.tsinghua.edu.cn/f/a50517f7b8e840949d3f/?dl=1) | +| OrientedRCNN-R50-FPN |DOTA1.0|1024/200|flip+rr+bc|-| SGD | 1x | 77.76 | [ICCV21](https://openaccess.thecvf.com/content/ICCV2021/papers/Xie_Oriented_R-CNN_for_Object_Detection_ICCV_2021_paper.pdf)| [config](configs/oriented_rcnn_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py) | [model](https://cloud.tsinghua.edu.cn/f/df64dd4980f84433a592/?dl=1) | +| OrientedRCNN-R50-FPN |DOTA1.0|1024/200|ms+flip+rr+bc|-| SGD | 1x | 80.13 | [ICCV21](https://openaccess.thecvf.com/content/ICCV2021/papers/Xie_Oriented_R-CNN_for_Object_Detection_ICCV_2021_paper.pdf)| [config](configs/oriented_rcnn_r50_fpn_1x_dota_ms_with_flip_rotate_balance_cate.py) | [model](https://cloud.tsinghua.edu.cn/f/a3843215d6194ae8823b/?dl=1) | **Notice**: diff --git a/configs/gliding_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py b/configs/gliding_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py index 17bc339..5214431 100644 --- a/configs/gliding_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py +++ b/configs/gliding_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py @@ -130,6 +130,9 @@ dict( type="RandomRotateAug", random_rotate_on=True, + rotate_90=False, + angles=(0, 90), + vert_rate=0.5 ), dict( type = "Pad", diff --git a/configs/oriented_rcnn_r50_fpn_1x_dota_ms_with_flip_rotate_balance_cate.py b/configs/oriented_rcnn_r50_fpn_1x_dota_ms_with_flip_rotate_balance_cate.py new file mode 100644 index 0000000..3c80720 --- /dev/null +++ b/configs/oriented_rcnn_r50_fpn_1x_dota_ms_with_flip_rotate_balance_cate.py @@ -0,0 +1,213 @@ +# model settings +model = dict( + type='OrientedRCNN', + backbone=dict( + type='Resnet50', + frozen_stages=1, + return_stages=["layer1","layer2","layer3","layer4"], + pretrained= True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn = dict( + type = "OrientedRPNHead", + in_channels=256, + num_classes=1, + min_bbox_size=0, + nms_thresh=0.8, + nms_pre=2000, + nms_post=2000, + feat_channels=256, + bbox_type='obb', + reg_dim=6, + background_label=0, + reg_decoded_bbox=False, + pos_weight=-1, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='MidpointOffsetCoder', + target_means=[.0, .0, .0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0, 0.5, 0.5]), + loss_cls=dict(type='CrossEntropyLossForRcnn', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1, + match_low_quality=True, + assigned_labels_filled=-1, + ), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False) + ), + bbox_head=dict( + type='OrientedHead', + num_classes=15, + in_channels=256, + fc_out_channels=1024, + score_thresh=0.05, + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1, + match_low_quality=False, + assigned_labels_filled=-1, + iou_calculator=dict(type='BboxOverlaps2D_rotated_v1')), + sampler=dict( + type='RandomSamplerRotated', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + bbox_coder=dict( + type='OrientedDeltaXYWHTCoder', + target_means=[0., 0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2, 0.1]), + bbox_roi_extractor=dict( + type='OrientedSingleRoIExtractor', + roi_layer=dict(type='ROIAlignRotated_v1', output_size=7, sampling_ratio=2), + out_channels=256, + extend_factor=(1.4, 1.2), + featmap_strides=[4, 8, 16, 32]), + loss_cls=dict( + type='CrossEntropyLoss', + ), + loss_bbox=dict( + type='SmoothL1Loss', + beta=1.0, + loss_weight=1.0 + ), + with_bbox=True, + with_shared_head=False, + with_avg_pool=False, + with_cls=True, + with_reg=True, + start_bbox_type='obb', + end_bbox_type='obb', + reg_dim=None, + reg_class_agnostic=True, + reg_decoded_bbox=False, + pos_weight=-1, + ) + ) + +dataset = dict( + train=dict( + type="DOTADataset", + dataset_dir='/mnt/disk/cxjyxx_me/JAD/datasets/processed_DOTA/trainval_1024_500_0.5-1.0-1.5', + transforms=[ + dict( + type="RotatedResize", + min_size=1024, + max_size=1024 + ), + dict( + type='RotatedRandomFlip', + direction="horizontal", + prob=0.5), + dict( + type='RotatedRandomFlip', + direction="vertical", + prob=0.5), + dict( + type="RandomRotateAug", + random_rotate_on=True, + rotate_90=False, + angles=(0, 90), + vert_rate=0.5, + ), + dict( + type = "Pad", + size_divisor=32), + dict( + type = "Normalize", + mean = [123.675, 116.28, 103.53], + std = [58.395, 57.12, 57.375], + to_bgr=False,), + dict( + type = "FliterEmpty", + fliter_list = ["rboxes"],), + + ], + batch_size=2, + num_workers=4, + shuffle=True, + filter_empty_gt=False, + balance_category=True + ), + val=dict( + type="DOTADataset", + dataset_dir='/home/cxjyxx_me/workspace/JAD/datasets/processed_DOTA/trainval_1024_500_0.5-1.0-1.5', + transforms=[ + dict( + type="RotatedResize", + min_size=1024, + max_size=1024 + ), + dict( + type = "Pad", + size_divisor=32), + dict( + type = "Normalize", + mean = [123.675, 116.28, 103.53], + std = [58.395, 57.12, 57.375], + to_bgr=False,), + ], + batch_size=2, + num_workers=4, + shuffle=False + ), + test=dict( + type="ImageDataset", + images_dir='/mnt/disk/cxjyxx_me/JAD/datasets/processed_DOTA/test_1024_500_0.5-1.0-1.5/images', + transforms=[ + dict( + type="RotatedResize", + min_size=1024, + max_size=1024 + ), + dict( + type = "Pad", + size_divisor=32), + dict( + type = "Normalize", + mean = [123.675, 116.28, 103.53], + std = [58.395, 57.12, 57.375], + to_bgr=False,), + ], + num_workers=4, + batch_size=1, + ) +) + +optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001, grad_clip=dict(max_norm=35, norm_type=2)) + +scheduler = dict( + type='StepLR', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + milestones=[7, 10]) + +logger = dict( + type="RunLogger") + +# when we the trained model from cshuan, image is rgb +max_epoch = 12 +eval_interval = 100 +checkpoint_interval = 1 +log_interval = 50 \ No newline at end of file diff --git a/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip.py b/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip.py index b6d669a..b29e917 100644 --- a/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip.py +++ b/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip.py @@ -108,7 +108,7 @@ dataset = dict( train=dict( type="DOTADataset", - dataset_dir='/home/cxjyxx_me/workspace/JAD/datasets/processed_DOTA/trainval_1024_200_1.0', + dataset_dir='/mnt/disk/cxjyxx_me/JAD/datasets/processed_DOTA/trainval_1024_200_1.0', transforms=[ dict( type="RotatedResize", @@ -126,6 +126,9 @@ # dict( # type="RandomRotateAug", # random_rotate_on=True, + # rotate_90=False, + # angles=(0, 90), + # vert_rate=0.5 # ), dict( type = "Pad", @@ -135,7 +138,6 @@ mean = [123.675, 116.28, 103.53], std = [58.395, 57.12, 57.375], to_bgr=False,) - ], batch_size=2, num_workers=4, @@ -167,7 +169,7 @@ ), test=dict( type="ImageDataset", - images_dir='/home/cxjyxx_me/workspace/JAD/datasets/processed_DOTA/test_1024_200_1.0/images/', + images_dir='/mnt/disk/cxjyxx_me/JAD/datasets/processed_DOTA/test_1024_200_1.0/images/', transforms=[ dict( type="RotatedResize", diff --git a/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py b/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py new file mode 100644 index 0000000..4914e58 --- /dev/null +++ b/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py @@ -0,0 +1,212 @@ +# model settings +model = dict( + type='OrientedRCNN', + backbone=dict( + type='Resnet50', + frozen_stages=1, + return_stages=["layer1","layer2","layer3","layer4"], + pretrained= True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn = dict( + type = "OrientedRPNHead", + in_channels=256, + num_classes=1, + min_bbox_size=0, + nms_thresh=0.8, + nms_pre=2000, + nms_post=2000, + feat_channels=256, + bbox_type='obb', + reg_dim=6, + background_label=0, + reg_decoded_bbox=False, + pos_weight=-1, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='MidpointOffsetCoder', + target_means=[.0, .0, .0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0, 0.5, 0.5]), + loss_cls=dict(type='CrossEntropyLossForRcnn', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1, + match_low_quality=True, + assigned_labels_filled=-1, + ), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False) + ), + bbox_head=dict( + type='OrientedHead', + num_classes=15, + in_channels=256, + fc_out_channels=1024, + score_thresh=0.05, + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1, + match_low_quality=False, + assigned_labels_filled=-1, + iou_calculator=dict(type='BboxOverlaps2D_rotated_v1')), + sampler=dict( + type='RandomSamplerRotated', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + bbox_coder=dict( + type='OrientedDeltaXYWHTCoder', + target_means=[0., 0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2, 0.1]), + bbox_roi_extractor=dict( + type='OrientedSingleRoIExtractor', + roi_layer=dict(type='ROIAlignRotated_v1', output_size=7, sampling_ratio=2), + out_channels=256, + extend_factor=(1.4, 1.2), + featmap_strides=[4, 8, 16, 32]), + loss_cls=dict( + type='CrossEntropyLoss', + ), + loss_bbox=dict( + type='SmoothL1Loss', + beta=1.0, + loss_weight=1.0 + ), + with_bbox=True, + with_shared_head=False, + with_avg_pool=False, + with_cls=True, + with_reg=True, + start_bbox_type='obb', + end_bbox_type='obb', + reg_dim=None, + reg_class_agnostic=True, + reg_decoded_bbox=False, + pos_weight=-1, + ) + ) + +dataset = dict( + train=dict( + type="DOTADataset", + dataset_dir='/mnt/disk/cxjyxx_me/JAD/datasets/processed_DOTA/trainval_1024_200_1.0', + transforms=[ + dict( + type="RotatedResize", + min_size=1024, + max_size=1024 + ), + dict( + type='RotatedRandomFlip', + direction="horizontal", + prob=0.5), + dict( + type='RotatedRandomFlip', + direction="vertical", + prob=0.5), + dict( + type="RandomRotateAug", + random_rotate_on=True, + rotate_90=False, + angles=(0, 90), + vert_rate=0.5 + ), + dict( + type = "Pad", + size_divisor=32), + dict( + type = "Normalize", + mean = [123.675, 116.28, 103.53], + std = [58.395, 57.12, 57.375], + to_bgr=False,), + dict( + type = "FliterEmpty", + fliter_list = ["rboxes"],), + ], + batch_size=2, + num_workers=4, + shuffle=True, + filter_empty_gt=False, + balance_category=True + ), + val=dict( + type="DOTADataset", + dataset_dir='/home/cxjyxx_me/workspace/JAD/datasets/processed_DOTA/trainval_1024_200_1.0', + transforms=[ + dict( + type="RotatedResize", + min_size=1024, + max_size=1024 + ), + dict( + type = "Pad", + size_divisor=32), + dict( + type = "Normalize", + mean = [123.675, 116.28, 103.53], + std = [58.395, 57.12, 57.375], + to_bgr=False,), + ], + batch_size=2, + num_workers=4, + shuffle=False + ), + test=dict( + type="ImageDataset", + images_dir='/mnt/disk/cxjyxx_me/JAD/datasets/processed_DOTA/test_1024_200_1.0/images/', + transforms=[ + dict( + type="RotatedResize", + min_size=1024, + max_size=1024 + ), + dict( + type = "Pad", + size_divisor=32), + dict( + type = "Normalize", + mean = [123.675, 116.28, 103.53], + std = [58.395, 57.12, 57.375], + to_bgr=False,), + ], + num_workers=4, + batch_size=1, + ) +) + +optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001, grad_clip=dict(max_norm=35, norm_type=2)) + +scheduler = dict( + type='StepLR', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + milestones=[7, 10]) + +logger = dict( + type="RunLogger") + +# when we the trained model from cshuan, image is rgb +max_epoch = 12 +eval_interval = 100 +checkpoint_interval = 1 +log_interval = 50 \ No newline at end of file diff --git a/projects/gliding/configs/gliding_r101_fpn_1x_dota_with_flip_rotate_balance_cate.py b/projects/gliding/configs/gliding_r101_fpn_1x_dota_with_flip_rotate_balance_cate.py index c14c6b5..6e43ec9 100644 --- a/projects/gliding/configs/gliding_r101_fpn_1x_dota_with_flip_rotate_balance_cate.py +++ b/projects/gliding/configs/gliding_r101_fpn_1x_dota_with_flip_rotate_balance_cate.py @@ -125,12 +125,20 @@ min_size=1024, max_size=1024 ), + dict( + type='RotatedRandomFlip', + direction="horizontal", + prob=0.5), dict( type='RotatedRandomFlip', + direction="vertical", prob=0.5), dict( type="RandomRotateAug", random_rotate_on=True, + rotate_90=False, + angles=(0, 90), + vert_rate=0.5, ), dict( type = "Pad", diff --git a/projects/gliding/configs/gliding_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py b/projects/gliding/configs/gliding_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py index f00aca3..2a084a9 100644 --- a/projects/gliding/configs/gliding_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py +++ b/projects/gliding/configs/gliding_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py @@ -124,12 +124,20 @@ min_size=1024, max_size=1024 ), + dict( + type='RotatedRandomFlip', + direction="horizontal", + prob=0.5), dict( type='RotatedRandomFlip', + direction="vertical", prob=0.5), dict( type="RandomRotateAug", random_rotate_on=True, + rotate_90=False, + angles=(0, 90), + vert_rate=0.5, ), dict( type = "Pad", diff --git a/projects/oriented_rcnn/README.md b/projects/oriented_rcnn/README.md index af61f23..d26dc47 100644 --- a/projects/oriented_rcnn/README.md +++ b/projects/oriented_rcnn/README.md @@ -12,4 +12,6 @@ python run_net.py --config-file=configs/oriented_rcnn_r50_fpn_1x_dota_with_flip. | Models | Dataset| Sub_Image_Size/Overlap |Train Aug | Test Aug | Optim | Lr schd | mAP | Paper | Config | Download | | :-----------: | :-----: |:-----:|:-----:| :-----: | :-----:| :-----:| :----: |:--------:|:--------: | :--------: | -| OrientedRCNN-R50-FPN |DOTA1.0|1024/200|Flip|-| SGD | 1x | 75.62 | [ICCV21](https://openaccess.thecvf.com/content/ICCV2021/papers/Xie_Oriented_R-CNN_for_Object_Detection_ICCV_2021_paper.pdf)| [config](configs/oriented_rcnn_r50_fpn_1x_dota_with_flip.py) | [model](https://cloud.tsinghua.edu.cn/f/a50517f7b8e840949d3f/?dl=1) | \ No newline at end of file +| OrientedRCNN-R50-FPN |DOTA1.0|1024/200|flip|-| SGD | 1x | 75.62 | [ICCV21](https://openaccess.thecvf.com/content/ICCV2021/papers/Xie_Oriented_R-CNN_for_Object_Detection_ICCV_2021_paper.pdf)| [config](configs/oriented_rcnn_r50_fpn_1x_dota_with_flip.py) | [model](https://cloud.tsinghua.edu.cn/f/a50517f7b8e840949d3f/?dl=1) | +| OrientedRCNN-R50-FPN |DOTA1.0|1024/200|flip+rr+bc|-| SGD | 1x | 77.76 | [ICCV21](https://openaccess.thecvf.com/content/ICCV2021/papers/Xie_Oriented_R-CNN_for_Object_Detection_ICCV_2021_paper.pdf)| [config](configs/oriented_rcnn_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py) | [model](https://cloud.tsinghua.edu.cn/f/df64dd4980f84433a592/?dl=1) | +| OrientedRCNN-R50-FPN |DOTA1.0|1024/200|ms+flip+rr+bc|-| SGD | 1x | 80.13 | [ICCV21](https://openaccess.thecvf.com/content/ICCV2021/papers/Xie_Oriented_R-CNN_for_Object_Detection_ICCV_2021_paper.pdf)| [config](configs/oriented_rcnn_r50_fpn_1x_dota_ms_with_flip_rotate_balance_cate.py) | [model](https://cloud.tsinghua.edu.cn/f/a3843215d6194ae8823b/?dl=1) | \ No newline at end of file diff --git a/projects/oriented_rcnn/configs/oriented_rcnn_r101_fpn_1x_dota_with_flip.py b/projects/oriented_rcnn/configs/oriented_rcnn_r101_fpn_1x_dota_with_flip.py index e04caec..0c0aa24 100644 --- a/projects/oriented_rcnn/configs/oriented_rcnn_r101_fpn_1x_dota_with_flip.py +++ b/projects/oriented_rcnn/configs/oriented_rcnn_r101_fpn_1x_dota_with_flip.py @@ -126,6 +126,9 @@ # dict( # type="RandomRotateAug", # random_rotate_on=True, + # rotate_90=False, + # angles=(0, 90), + # vert_rate=0.5 # ), dict( type = "Pad", diff --git a/projects/oriented_rcnn/configs/oriented_rcnn_r50_fpn_1x_dota_ms_with_flip_rotate_balance_cate.py b/projects/oriented_rcnn/configs/oriented_rcnn_r50_fpn_1x_dota_ms_with_flip_rotate_balance_cate.py index 4f6c9ee..7734087 100644 --- a/projects/oriented_rcnn/configs/oriented_rcnn_r50_fpn_1x_dota_ms_with_flip_rotate_balance_cate.py +++ b/projects/oriented_rcnn/configs/oriented_rcnn_r50_fpn_1x_dota_ms_with_flip_rotate_balance_cate.py @@ -126,6 +126,9 @@ dict( type="RandomRotateAug", random_rotate_on=True, + rotate_90=False, + angles=(0, 90), + vert_rate=0.5, ), dict( type = "Pad", @@ -134,7 +137,10 @@ type = "Normalize", mean = [123.675, 116.28, 103.53], std = [58.395, 57.12, 57.375], - to_bgr=False,) + to_bgr=False,), + dict( + type = "FliterEmpty", + fliter_list = ["rboxes"],), ], batch_size=2, diff --git a/projects/oriented_rcnn/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip.py b/projects/oriented_rcnn/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip.py index b6d669a..9e81964 100644 --- a/projects/oriented_rcnn/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip.py +++ b/projects/oriented_rcnn/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip.py @@ -126,6 +126,9 @@ # dict( # type="RandomRotateAug", # random_rotate_on=True, + # rotate_90=False, + # angles=(0, 90), + # vert_rate=0.5 # ), dict( type = "Pad", diff --git a/projects/oriented_rcnn/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py b/projects/oriented_rcnn/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py index 4ee725f..4914e58 100644 --- a/projects/oriented_rcnn/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py +++ b/projects/oriented_rcnn/configs/oriented_rcnn_r50_fpn_1x_dota_with_flip_rotate_balance_cate.py @@ -108,19 +108,27 @@ dataset = dict( train=dict( type="DOTADataset", - dataset_dir='/home/cxjyxx_me/workspace/JAD/datasets/processed_DOTA/trainval_1024_200_1.0', + dataset_dir='/mnt/disk/cxjyxx_me/JAD/datasets/processed_DOTA/trainval_1024_200_1.0', transforms=[ dict( type="RotatedResize", min_size=1024, max_size=1024 ), + dict( + type='RotatedRandomFlip', + direction="horizontal", + prob=0.5), dict( type='RotatedRandomFlip', + direction="vertical", prob=0.5), dict( type="RandomRotateAug", random_rotate_on=True, + rotate_90=False, + angles=(0, 90), + vert_rate=0.5 ), dict( type = "Pad", @@ -129,8 +137,10 @@ type = "Normalize", mean = [123.675, 116.28, 103.53], std = [58.395, 57.12, 57.375], - to_bgr=False,) - + to_bgr=False,), + dict( + type = "FliterEmpty", + fliter_list = ["rboxes"],), ], batch_size=2, num_workers=4, @@ -162,7 +172,7 @@ ), test=dict( type="ImageDataset", - images_dir='/home/cxjyxx_me/workspace/JAD/datasets/processed_DOTA/test_1024_200_1.0/images/', + images_dir='/mnt/disk/cxjyxx_me/JAD/datasets/processed_DOTA/test_1024_200_1.0/images/', transforms=[ dict( type="RotatedResize", diff --git a/python/jdet/data/custom.py b/python/jdet/data/custom.py index cf967c0..f254a68 100644 --- a/python/jdet/data/custom.py +++ b/python/jdet/data/custom.py @@ -103,7 +103,7 @@ def collate_batch(self,batch): batch_imgs = np.zeros((N,3,max_height,max_width),dtype=np.float32) for i,image in enumerate(imgs): batch_imgs[i,:,:image.shape[-2],:image.shape[-1]] = image - + return batch_imgs,anns def __getitem__(self, idx): @@ -114,6 +114,9 @@ def __getitem__(self, idx): if self.transforms is not None: image, anno = self.transforms(image, anno) + if anno is None: + return self.__getitem__(np.random.choice(np.arange(self.total_len))) + return image, anno def evaluate(self,results,work_dir,epoch,logger=None): diff --git a/python/jdet/data/transforms.py b/python/jdet/data/transforms.py index 508c844..1e6eb52 100644 --- a/python/jdet/data/transforms.py +++ b/python/jdet/data/transforms.py @@ -1,4 +1,5 @@ import random + import jittor as jt import cv2 import numpy as np @@ -7,6 +8,9 @@ from jdet.utils.registry import build_from_cfg,TRANSFORMS from jdet.models.boxes.box_ops import rotated_box_to_poly_np,poly_to_rotated_box_np,norm_angle from jdet.models.boxes.iou_calculator import bbox_overlaps_np +from jdet.ops.bbox_move import warp +from jdet.ops.bbox_transforms_numpy import bbox2type +from jdet.ops.bbox_geometry import bbox_overlaps from numpy import random as nprandom @TRANSFORMS.register_module() @@ -30,16 +34,53 @@ def __call__(self, image, target=None): @TRANSFORMS.register_module() class RandomRotateAug: - def __init__(self, random_rotate_on=False): + def __init__(self, + random_rotate_on=False, + rotate_90=True, + keep_shape=True, + angles=(0, 90), + rotate_mode='range', + vert_rate=0.5, + vert_cls=None, + label_standard="rboxes", + keep_iof_thr=0.7): + self.random_rotate_on = random_rotate_on - - def _rotate_boxes_90(self,target,size): - w,h = size - for key in["bboxes","hboxes","rboxes","polys","hboxes_ignore","polys_ignore","rboxes_ignore"]: + self.rotate_90 = rotate_90 + self.keep_shape = keep_shape + self.angles = angles + self.rotate_mode = rotate_mode + self.vert_rate = vert_rate + self.vert_cls = vert_cls + self.keep_iof_thr = keep_iof_thr + self.label_standard = label_standard + + def get_matrix_and_size(self, target): + angle = target["rotate_angle"] + height, width = target["img_size"][:2] + if self.keep_shape: + center = ((width - 1) * 0.5, (height - 1) * 0.5) + matrix = cv2.getRotationMatrix2D(center, angle, 1) + else: + matrix = cv2.getRotationMatrix2D((0, 0), angle, 1) + img_bbox = np.array([[0, 0, width, 0, width, height, 0, width]]) + img_bbox = bbox2type(warp(img_bbox, matrix), 'hbb') + + width = int(img_bbox[0, 2] - img_bbox[0, 0] + 1) + height = int(img_bbox[0, 3] - img_bbox[0, 1] + 1) + matrix[0, 2] = -img_bbox[0, 0] + matrix[1, 2] = -img_bbox[0, 1] + return matrix, width, height + + def _rotate_boxes_90(self, target,size): + + w, h = size + for key in["bboxes", "hboxes", "rboxes", "polys", "hboxes_ignore", "polys_ignore", "rboxes_ignore"]: + if key not in target: continue bboxes = target[key] - if bboxes.ndim<2: + if bboxes.ndim < 2: continue if "bboxes" in key or "hboxes" in key: new_boxes = np.zeros_like(bboxes) @@ -60,19 +101,81 @@ def _rotate_boxes_90(self,target,size): if "rboxes" in key: new_bboxes = poly_to_rotated_box_np(new_bboxes) - target[key]=new_bboxes + target[key] = new_bboxes + + def _rotate_boxes_rand(self, target, matrix, w, h, img_bound): + + for key in ["bboxes", "hboxes", "rboxes", "polys",]: + if key not in target: + continue + + bboxes = target[key] + if bboxes.ndim < 2: + continue + warped_bboxes = warp(target[key], matrix, keep_type=True) + if self.keep_shape: + iofs = bbox_overlaps(warped_bboxes, img_bound, mode='iof') + if_inwindow = iofs[:, 0] > self.keep_iof_thr + new_bboxes = warped_bboxes[if_inwindow] + + if key == self.label_standard: + label_if_inwindow = if_inwindow.copy() + + target[key] = new_bboxes + + if "labels" in target.keys(): + target['labels'] = target['labels'][label_if_inwindow] def __call__( self, image, target=None ): - # (0, 90, 180, or 270) + if self.random_rotate_on: - indx = int(random.random() * 100) // 25 - # anticlockwise - for _ in range(indx): + + vert = False + if self.vert_cls is not None: + if "cls" not in target: + raise ValueError('need class order when vert_cls is not None') + vert_lbls = [target["cls"].index(c) for c in self.vert_cls] + if "labels" in target: + labels = target["labels"] + for i in vert_lbls: + if (labels == i).any(): + vert = True + + vert = True if np.random.rand() < self.vert_rate else vert + + if self.rotate_90 or vert == True: + # (0, 90, 180, or 270) + indx = int(random.random() * 100) // 25 + # anticlockwise + for _ in range(indx): + if target is not None: + self._rotate_boxes_90(target , image.size) + image = image.rotate(90, expand=True) if target is not None: - self._rotate_boxes_90(target,image.size) - image = image.rotate(90,expand=True) - if target is not None: - target["rotate_angle"]=90*indx + target["rotate_angle"] = 90 * indx + + else: + + target["rboxes"][:, 4] *= -1 + if self.rotate_mode == 'value': + angles = list(self.angles) + angles = angles + [0] if 0 not in angles else angles + np.random.shuffle(angles) + angle = angles[0] + else: + angle_min, angle_max = min(self.angles), max(self.angles) + angle = (angle_max - angle_min) * np.random.rand() + angle_min + + if target is not None: + target["rotate_angle"] = angle + + if angle != 0: + matrix, w, h = self.get_matrix_and_size(target) + + img_bound = np.array([[0, 0, w, 0, w, h, 0, h]]) + self._rotate_boxes_rand(target, matrix, w, h, img_bound) + image = image.rotate(angle) + target["rboxes"][:, 4] *= -1 return image, target @@ -485,3 +588,17 @@ def __call__(self, image, target=None): target["to_bgr"] = self.to_bgr return image, target +@TRANSFORMS.register_module() +class FliterEmpty: + + def __init__(self, fliter_list): + self.fliter_list = fliter_list + + def __call__(self, image, target=None): + + for k in self.fliter_list: + if k == "rboxes" or k == "hboxes" or k == "polys" or k == "bboxes": + if target[k].size == 0: + return image, None + + return image, target diff --git a/python/jdet/ops/bbox_geometry.py b/python/jdet/ops/bbox_geometry.py index a0c671b..56e1af3 100644 --- a/python/jdet/ops/bbox_geometry.py +++ b/python/jdet/ops/bbox_geometry.py @@ -1,7 +1,7 @@ import numpy as np import shapely.geometry as shgeo -from jdet.ops.bbox_transforms import * +from jdet.ops.bbox_transforms_numpy import * def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6): assert mode in ['iou', 'iof'] diff --git a/python/jdet/ops/bbox_move.py b/python/jdet/ops/bbox_move.py new file mode 100644 index 0000000..2786264 --- /dev/null +++ b/python/jdet/ops/bbox_move.py @@ -0,0 +1,67 @@ +import numpy as np + +from jdet.ops.bbox_transforms_numpy import bbox2type, get_bbox_type, regular_obb + +def translate(bboxes, x, y): + assert get_bbox_type(bboxes) != 'notype' + + if get_bbox_type(bboxes) == 'obb': + translated = bboxes.copy() + translated[..., :2] = translated[..., :2] + \ + np.array([x, y], dtype=np.float32) + else: + dim = bboxes.shape[-1] + translated = bboxes + \ + np.array([x, y]*int(dim/2), dtype=np.float32) + return translated + + +def flip(bboxes, W, H, direction='horizontal'): + assert get_bbox_type(bboxes) != 'notype' + assert direction in ['horizontal', 'vertical'] + + flipped = bboxes.copy() + if get_bbox_type(bboxes) == 'poly': + if direction == 'horizontal': + flipped[..., 0::2] = W - bboxes[..., 0::2] + else: + flipped[..., 1::2] = H - bboxes[..., 1::2] + + if get_bbox_type(bboxes) == 'obb': + if direction == 'horizontal': + flipped[..., 0] = W - bboxes[..., 0] + else: + flipped[..., 1] = H - bboxes[..., 1] + flipped[..., 4] = -flipped[..., 4] + flipped = regular_obb(flipped) + + if get_bbox_type(bboxes) == 'hbb': + if direction == 'horizontal': + flipped[..., 0::4] = W - bboxes[..., 2::4] + flipped[..., 2::4] = W - bboxes[..., 0::4] + else: + flipped[..., 1::4] = H - bboxes[..., 3::4] + flipped[..., 3::4] = H - bboxes[..., 1::4] + return flipped + + +def warp(bboxes, M, keep_type=False): + + ori_type = get_bbox_type(bboxes) + assert ori_type != 'notype' + assert M.ndim == 2 + + polys = bbox2type(bboxes, 'poly') + + shape = polys.shape + group_pts = polys.reshape(*shape[:-1], shape[-1]//2, 2) + group_pts = np.insert(group_pts, 2, 1, axis=-1) + warped_pts = np.matmul(group_pts, M.T) + + if M.shape[0] == 3: + warped_pts = (warped_pts / warped_pts[..., -1:])[..., :-1] + warped_pts = warped_pts.reshape(*shape) + + if keep_type: + warped_pts = bbox2type(warped_pts, ori_type) + return warped_pts diff --git a/python/jdet/ops/bbox_transforms.py b/python/jdet/ops/bbox_transforms.py index 3b47535..126bee4 100644 --- a/python/jdet/ops/bbox_transforms.py +++ b/python/jdet/ops/bbox_transforms.py @@ -4,6 +4,8 @@ import math import copy #TODO: remove copy(?) +pi = np.pi + def dbbox2delta_v3(proposals, gt, means = [0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1]): proposals = proposals.float() gt = gt.float() @@ -38,7 +40,7 @@ def hbb2obb_v2(boxes): ex_ctr_x = boxes[..., 0] + 0.5 * (ex_heights - 1.0) ex_ctr_y = boxes[..., 1] + 0.5 * (ex_widths - 1.0) c_bboxes = jt.contrib.concat((ex_ctr_x.unsqueeze(1), ex_ctr_y.unsqueeze(1), ex_widths.unsqueeze(1), ex_heights.unsqueeze(1)), 1) - initial_angles = -jt.ones((num_boxes, 1)) * np.pi / 2 + initial_angles = -jt.ones((num_boxes, 1)) * pi / 2 dbboxes = jt.contrib.concat((c_bboxes, initial_angles), 1) return dbboxes @@ -120,7 +122,7 @@ def polygonToRotRectangle_batch(bbox, with_module=True): h = h[:, np.newaxis] # TODO: check it if with_module: - angle = angle[:, np.newaxis] % ( 2 * np.pi) + angle = angle[:, np.newaxis] % ( 2 * pi) else: angle = angle[:, np.newaxis] dboxes = np.concatenate((center[:, 0].astype(np.float), center[:, 1].astype(np.float), w, h, angle), axis=1) @@ -224,7 +226,7 @@ def dbbox2delta_v2(proposals, gt, means = [0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1]) inds = jt.sin(targets_dangle) < 0 dist[inds] = -dist[inds] # TODO: change the norm value - dist = dist / (np.pi / 2.) + dist = dist / (pi / 2.) deltas = jt.stack((targets_dx, targets_dy, targets_dw, targets_dh, dist), -1) @@ -239,19 +241,19 @@ def choose_best_match_batch(Rrois, gt_rois): gt_xs, gt_ys, gt_ws, gt_hs, gt_angles = gt_rois[:, 0].copy(), gt_rois[:, 1].copy(), gt_rois[:, 2].copy(), gt_rois[:, 3].copy(), gt_rois[:, 4].copy() - gt_angle_extent = jt.contrib.concat((gt_angles[:, np.newaxis], (gt_angles + np.pi/2.)[:, np.newaxis], - (gt_angles + np.pi)[:, np.newaxis], (gt_angles + np.pi * 3/2.)[:, np.newaxis]), 1) - dist = (Rroi_angles - gt_angle_extent) % (2 * np.pi) - dist = jt.minimum(dist, np.pi * 2 - dist) + gt_angle_extent = jt.contrib.concat((gt_angles[:, np.newaxis], (gt_angles + pi/2.)[:, np.newaxis], + (gt_angles + pi)[:, np.newaxis], (gt_angles + pi * 3/2.)[:, np.newaxis]), 1) + dist = (Rroi_angles - gt_angle_extent) % (2 * pi) + dist = jt.minimum(dist, pi * 2 - dist) min_index = jt.argmin(dist, 1)[0] gt_rois_extent0 = copy.deepcopy(gt_rois) gt_rois_extent1 = jt.contrib.concat((gt_xs.unsqueeze(1), gt_ys.unsqueeze(1), \ - gt_hs.unsqueeze(1), gt_ws.unsqueeze(1), gt_angles.unsqueeze(1) + np.pi/2.), 1) + gt_hs.unsqueeze(1), gt_ws.unsqueeze(1), gt_angles.unsqueeze(1) + pi/2.), 1) gt_rois_extent2 = jt.contrib.concat((gt_xs.unsqueeze(1), gt_ys.unsqueeze(1), \ - gt_ws.unsqueeze(1), gt_hs.unsqueeze(1), gt_angles.unsqueeze(1) + np.pi), 1) + gt_ws.unsqueeze(1), gt_hs.unsqueeze(1), gt_angles.unsqueeze(1) + pi), 1) gt_rois_extent3 = jt.contrib.concat((gt_xs.unsqueeze(1), gt_ys.unsqueeze(1), \ - gt_hs.unsqueeze(1), gt_ws.unsqueeze(1), gt_angles.unsqueeze(1) + np.pi * 3/2.), 1) + gt_hs.unsqueeze(1), gt_ws.unsqueeze(1), gt_angles.unsqueeze(1) + pi * 3/2.), 1) gt_rois_extent = jt.contrib.concat((gt_rois_extent0.unsqueeze(1), gt_rois_extent1.unsqueeze(1), gt_rois_extent2.unsqueeze(1), @@ -261,7 +263,7 @@ def choose_best_match_batch(Rrois, gt_rois): for curiter, index in enumerate(min_index): gt_rois_new[curiter, :] = gt_rois_extent[curiter, index.item(), :] - gt_rois_new[:, 4] = gt_rois_new[:, 4] % (2 * np.pi) + gt_rois_new[:, 4] = gt_rois_new[:, 4] % (2 * pi) return gt_rois_new @@ -310,9 +312,9 @@ def delta2dbbox_v3(Rrois, gh = Rroi_h * dh.exp() # TODO: check the hard code - # gangle = (2 * np.pi) * dangle + Rroi_angle + # gangle = (2 * pi) * dangle + Rroi_angle gangle = dangle + Rroi_angle - # gangle = gangle % ( 2 * np.pi) + # gangle = gangle % ( 2 * pi) if max_shape is not None: pass @@ -351,7 +353,7 @@ def delta2dbbox_v2(Rrois, gw = Rroi_w * dw.exp() gh = Rroi_h * dh.exp() - gangle = (np.pi / 2.) * dangle + Rroi_angle + gangle = (pi / 2.) * dangle + Rroi_angle if max_shape is not None: pass @@ -456,9 +458,9 @@ def choose_best_Rroi_batch(Rroi): Rroi[indexes, 2] = h[indexes] Rroi[indexes, 3] = w[indexes] - Rroi[indexes, 4] = Rroi[indexes, 4] + np.pi / 2. + Rroi[indexes, 4] = Rroi[indexes, 4] + pi / 2. # TODO: check the module - Rroi[:, 4] = Rroi[:, 4] % np.pi + Rroi[:, 4] = Rroi[:, 4] % pi return Rroi @@ -496,9 +498,9 @@ def dbbox2roi(dbbox_list): drois = jt.contrib.concat(drois_list, 0) return drois -def regular_theta(theta, mode='180', start=-np.pi/2): +def regular_theta(theta, mode='180', start=-pi/2): assert mode in ['360', '180'] - cycle = 2 * np.pi if mode == '360' else np.pi + cycle = 2 * pi if mode == '360' else pi theta = theta - start theta = theta % cycle @@ -509,10 +511,10 @@ def regular_obb(obboxes): w_regular = w * (w > h) + h * (1 - (w > h)) h_regular = h * (w > h) + w * (1 - (w > h)) - theta_regular = theta * (w > h) + (theta + np.pi / 2) * (1 - (w > h)) + theta_regular = theta * (w > h) + (theta + pi / 2) * (1 - (w > h)) # w_regular = jt.where(w > h, w, h) # h_regular = jt.where(w > h, h, w) - # theta_regular = jt.where(w > h, theta, theta + np.pi/2) + # theta_regular = jt.where(w > h, theta, theta + pi/2) theta_regular = regular_theta(theta_regular) return jt.stack([x, y, w_regular, h_regular, theta_regular], dim=-1) @@ -561,7 +563,7 @@ def poly2obb(polys): else: w, h = h, w angle = -90 - angle - theta = angle / 180 * np.pi + theta = angle / 180 * pi obboxes.append([x, y, w, h, theta]) if not obboxes: @@ -643,7 +645,7 @@ def hbb2obb(hbboxes): theta = jt.zeros_like(x) obboxes1 = jt.stack([x, y, w, h, theta], dim=-1) - obboxes2 = jt.stack([x, y, h, w, theta-np.pi/2], dim=-1) + obboxes2 = jt.stack([x, y, h, w, theta-pi/2], dim=-1) flag = (w >= h)[..., None] obboxes = flag * obboxes1 + (1 - flag) * obboxes2 diff --git a/python/jdet/ops/bbox_transforms_numpy.py b/python/jdet/ops/bbox_transforms_numpy.py new file mode 100644 index 0000000..fe2276a --- /dev/null +++ b/python/jdet/ops/bbox_transforms_numpy.py @@ -0,0 +1,192 @@ +import cv2 +import numpy as np + +pi = np.pi + +def poly2obb(polys): + # [-90 90) + order = polys.shape[:-1] + num_points = polys.shape[-1] // 2 + polys = polys.reshape(-1, num_points, 2) + polys = polys.astype(np.float32) + + obboxes = [] + for poly in polys: + (x, y), (w, h), angle = cv2.minAreaRect(poly) + if w >= h: + angle = -angle + else: + w, h = h, w + angle = -90 - angle + theta = angle / 180 * pi + obboxes.append([x, y, w, h, theta]) + + if not obboxes: + obboxes = np.zeros((0, 5), dtype=np.float32) + else: + obboxes = np.array(obboxes, dtype=np.float32) + return obboxes.reshape(*order, 5) + + +def rectpoly2obb(polys): + theta = np.arctan2(-(polys[..., 3] - polys[..., 1]), + polys[..., 2] - polys[..., 0]) + Cos, Sin = np.cos(theta), np.sin(theta) + Matrix = np.stack([Cos, -Sin, Sin, Cos], axis=-1) + Matrix = Matrix.reshape(*Matrix.shape[:-1], 2, 2) + + x = polys[..., 0::2].mean(-1) + y = polys[..., 1::2].mean(-1) + center = np.stack([x, y], axis=-1).expand_dims(-2) + center_polys = polys.reshape(*polys.shape[:-1], 4, 2) - center + rotate_polys = np.matmul(center_polys, Matrix.swapaxes(-1, -2)) + + xmin = np.min(rotate_polys[..., :, 0], axis=-1) + xmax = np.max(rotate_polys[..., :, 0], axis=-1) + ymin = np.min(rotate_polys[..., :, 1], axis=-1) + ymax = np.max(rotate_polys[..., :, 1], axis=-1) + w = xmax - xmin + h = ymax - ymin + + obboxes = np.stack([x, y, w, h, theta], axis=-1) + + + +def poly2hbb(polys): + shape = polys.shape + polys = polys.reshape(*shape[:-1], shape[-1]//2, 2) + lt_point = np.min(polys, axis=-2) + rb_point = np.max(polys, axis=-2) + return np.concatenate([lt_point, rb_point], axis=-1) + + +def obb2poly(obboxes): + center, w, h, theta = np.split(obboxes, (2, 3, 4), axis=-1) + Cos, Sin = np.cos(theta), np.sin(theta) + + vector1 = np.concatenate( + [w/2 * Cos, -w/2 * Sin], axis=-1) + vector2 = np.concatenate( + [-h/2 * Sin, -h/2 * Cos], axis=-1) + + point1 = center + vector1 + vector2 + point2 = center + vector1 - vector2 + point3 = center - vector1 - vector2 + point4 = center - vector1 + vector2 + return np.concatenate( + [point1, point2, point3, point4], axis=-1) + + +def obb2hbb(obboxes): + center, w, h, theta = np.split(obboxes, (2, 3, 4), axis=-1) + Cos, Sin = np.cos(theta), np.sin(theta) + x_bias = np.abs(w/2 * Cos) + np.abs(h/2 * Sin) + y_bias = np.abs(w/2 * Sin) + np.abs(h/2 * Cos) + bias = np.concatenate([x_bias, y_bias], axis=-1) + return np.concatenate([center-bias, center+bias], axis=-1) + + +def hbb2poly(hbboxes): + l, t, r, b = [hbboxes[..., i] for i in range(4)] + return np.stack([l, t, r, t, r, b, l, b], axis=-1) + + +def hbb2obb(hbboxes): + order = hbboxes.shape[:-1] + x = (hbboxes[..., 0] + hbboxes[..., 2]) * 0.5 + y = (hbboxes[..., 1] + hbboxes[..., 3]) * 0.5 + w = hbboxes[..., 2] - hbboxes[..., 0] + h = hbboxes[..., 3] - hbboxes[..., 1] + + theta = np.zeros(order, dtype=np.float32) + obboxes1 = np.stack([x, y, w, h, theta], axis=-1) + obboxes2 = np.stack([x, y, h, w, theta-pi/2], axis=-1) + obboxes = np.where((w >= h)[..., None], obboxes1, obboxes2) + return obboxes + + +_type_func_map = { + ('poly', 'obb'): poly2obb, + ('poly', 'hbb'): poly2hbb, + ('obb', 'poly'): obb2poly, + ('obb', 'hbb'): obb2hbb, + ('hbb', 'poly'): hbb2poly, + ('hbb', 'obb'): hbb2obb +} + + +def bbox2type(bboxes, to_type): + assert to_type in ['hbb', 'obb', 'poly'] + + ori_type = get_bbox_type(bboxes) + if ori_type == 'notype': + raise ValueError('Not a bbox type') + if ori_type == to_type: + return bboxes + trans_func = _type_func_map[(ori_type, to_type)] + return trans_func(bboxes) + +def get_bbox_type(bboxes, with_score=False): + dim = bboxes.shape[-1] + if with_score: + dim -= 1 + + if dim == 4: + return 'hbb' + if dim == 5: + return 'obb' + if dim == 8: + return 'poly' + return 'notype' + + +def get_bbox_dim(bbox_type, with_score=False): + if bbox_type == 'hbb': + dim = 4 + elif bbox_type == 'obb': + dim = 5 + elif bbox_type == 'poly': + dim = 8 + else: + raise ValueError(f"don't know {bbox_type} bbox dim") + + if with_score: + dim += 1 + return dim + + +def choice_by_type(hbb_op, obb_op, poly_op, bboxes_or_type, + with_score=False): + if isinstance(bboxes_or_type, np.ndarray): + bbox_type = get_bbox_type(bboxes_or_type, with_score) + elif isinstance(bboxes_or_type, str): + bbox_type = bboxes_or_type + else: + raise TypeError(f'need np.ndarray or str,', + f'but get {type(bboxes_or_type)}') + + if bbox_type == 'hbb': + return hbb_op + elif bbox_type == 'obb': + return obb_op + elif bbox_type == 'poly': + return poly_op + else: + raise ValueError('notype bboxes is not suppert') + + +def regular_theta(theta, mode='180', start=-pi/2): + assert mode in ['360', '180'] + cycle = 2 * pi if mode == '360' else pi + + theta = theta - start + theta = theta % cycle + return theta + start + +def regular_obb(obboxes): + x, y, w, h, theta = [obboxes[..., i] for i in range(5)] + w_regular = np.where(w > h, w, h) + h_regular = np.where(w > h, h, w) + theta_regular = np.where(w > h, theta, theta+pi/2) + theta_regular = regular_theta(theta_regular) + return np.stack([x, y, w_regular, h_regular, theta_regular], axis=-1)