Add YOLOX config (open-mmlab#5808)

hhaAndroid · bognabylicka · commit af2d508fd816 · 2022-01-04T14:47:23.000+01:00
* Add YOLOX config * update * fix error * fix lr error * fix tiny config error and foreground_mask warning * fix dp train error * add comment * support browse_dataset * add comment * fix __repr__ * Switch to synchronizing norm interval. * Add README and metafile * update README * update doc * rename * revert * update (cherry picked from commit 2bdb167)
diff --git a/configs/yolox/README.md b/configs/yolox/README.md
@@ -0,0 +1,25 @@
+# YOLOX: Exceeding YOLO Series in 2021
+
+## Introduction
+
+<!-- [ALGORITHM] -->
+
+```latex
+@article{yolox2021,
+  title={{YOLOX}: Exceeding YOLO Series in 2021},
+  author={Ge, Zheng and Liu, Songtao and Wang, Feng and Li, Zeming and Sun, Jian},
+  journal={arXiv preprint arXiv:2107.08430},
+  year={2021}
+}
+```
+
+## Results and Models
+
+| Backbone  | size   | Mem (GB) |   box AP | Config | Download |
+|:---------:|:-------:|:-------:|:-------:|:--------:|:------:|
+| YOLOX-Tiny | 416 |   3.6      |   31.6  | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox/yolox_tiny_8x8_300e_coco.py)       |[model](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20210806_234250-4ff3b67e.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20210806_234250.log.json) |
+
+**Note**:
+
+1. The test score threshold is 0.001.
+2. We find that the performance is unstable and may fluctuate by about 0.7 mAP. We will continue to investigate and improve it.
diff --git a/configs/yolox/metafile.yml b/configs/yolox/metafile.yml
@@ -0,0 +1,28 @@
+Collections:
+  - Name: YOLOX
+    Metadata:
+      Training Data: COCO
+      Training Techniques:
+        - SGD with Nesterov
+        - Weight Decay
+        - Cosine Annealing Lr Updater
+      Training Resources: 8x TITANXp GPUs
+      Architecture:
+        - CSPDarkNet
+        - PAFPN
+    Paper: https://arxiv.org/abs/2107.08430
+    README: configs/yolox/README.md
+
+Models:
+  - Name: yolox_tiny_8x8_300e_coco
+    In Collection: YOLOX
+    Config: configs/yolox/yolox_tiny_8x8_300e_coco.py
+    Metadata:
+      Training Memory (GB): 3.6
+      Epochs: 300
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 31.6
+    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20210806_234250-4ff3b67e.pth
diff --git a/configs/yolox/yolox_l_8x8_300e_coco.py b/configs/yolox/yolox_l_8x8_300e_coco.py
@@ -0,0 +1,8 @@
+_base_ = './yolox_s_8x8_300e_coco.py'
+
+# model settings
+model = dict(
+    backbone=dict(deepen_factor=1.0, widen_factor=1.0),
+    neck=dict(
+        in_channels=[256, 512, 1024], out_channels=256, num_csp_blocks=3),
+    bbox_head=dict(in_channels=256, feat_channels=256))
diff --git a/configs/yolox/yolox_m_8x8_300e_coco.py b/configs/yolox/yolox_m_8x8_300e_coco.py
@@ -0,0 +1,8 @@
+_base_ = './yolox_s_8x8_300e_coco.py'
+
+# model settings
+model = dict(
+    backbone=dict(deepen_factor=0.67, widen_factor=0.75),
+    neck=dict(in_channels=[192, 384, 768], out_channels=192, num_csp_blocks=2),
+    bbox_head=dict(in_channels=192, feat_channels=192),
+)
diff --git a/configs/yolox/yolox_nano_8x8_300e_coco.py b/configs/yolox/yolox_nano_8x8_300e_coco.py
@@ -0,0 +1,11 @@
+_base_ = './yolox_tiny_8x8_300e_coco.py'
+
+# model settings
+model = dict(
+    backbone=dict(deepen_factor=0.33, widen_factor=0.25, use_depthwise=True),
+    neck=dict(
+        in_channels=[64, 128, 256],
+        out_channels=64,
+        num_csp_blocks=1,
+        use_depthwise=True),
+    bbox_head=dict(in_channels=64, feat_channels=64, use_depthwise=True))
diff --git a/configs/yolox/yolox_s_8x8_300e_coco.py b/configs/yolox/yolox_s_8x8_300e_coco.py
@@ -0,0 +1,143 @@
+_base_ = ['../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py']
+
+# model settings
+model = dict(
+    type='YOLOX',
+    backbone=dict(type='CSPDarknet', deepen_factor=0.33, widen_factor=0.5),
+    neck=dict(
+        type='YOLOXPAFPN',
+        in_channels=[128, 256, 512],
+        out_channels=128,
+        num_csp_blocks=1),
+    bbox_head=dict(
+        type='YOLOXHead', num_classes=80, in_channels=128, feat_channels=128),
+    train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
+    # In order to align the source code, the threshold of the val phase is
+    # 0.01, and the threshold of the test phase is 0.001.
+    test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)))
+
+# dataset settings
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+
+img_scale = (640, 640)
+
+train_pipeline = [
+    dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
+    dict(
+        type='RandomAffine',
+        scaling_ratio_range=(0.1, 2),
+        border=(-img_scale[0] // 2, -img_scale[1] // 2)),
+    dict(
+        type='MixUp',
+        img_scale=img_scale,
+        ratio_range=(0.8, 1.6),
+        pad_val=114.0),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Resize', keep_ratio=True),
+    dict(type='Pad', pad_to_square=True, pad_val=114.0),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+]
+
+train_dataset = dict(
+    type='MultiImageMixDataset',
+    dataset=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        pipeline=[
+            dict(type='LoadImageFromFile', to_float32=True),
+            dict(type='LoadAnnotations', with_bbox=True)
+        ],
+        filter_empty_gt=False,
+    ),
+    pipeline=train_pipeline,
+    dynamic_scale=img_scale)
+
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale,
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Pad', size=img_scale, pad_val=114.0),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='DefaultFormatBundle'),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=2,
+    train=train_dataset,
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        pipeline=test_pipeline))
+
+# optimizer
+# default 8 gpu
+optimizer = dict(
+    type='SGD',
+    lr=0.01,
+    momentum=0.9,
+    weight_decay=5e-4,
+    nesterov=True,
+    paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
+optimizer_config = dict(grad_clip=None)
+
+# learning policy
+lr_config = dict(
+    _delete_=True,
+    policy='YOLOX',
+    warmup='exp',
+    by_epoch=False,
+    warmup_by_epoch=True,
+    warmup_ratio=1,
+    warmup_iters=5,  # 5 epoch
+    num_last_epochs=15,
+    min_lr_ratio=0.05)
+runner = dict(type='EpochBasedRunner', max_epochs=300)
+
+resume_from = None
+interval = 10
+
+custom_hooks = [
+    dict(type='YOLOXModeSwitchHook', num_last_epochs=15, priority=48),
+    dict(
+        type='SyncRandomSizeHook',
+        ratio_range=(14, 26),
+        img_scale=img_scale,
+        interval=interval,
+        priority=48),
+    dict(
+        type='SyncNormHook',
+        num_last_epochs=15,
+        interval=interval,
+        priority=48),
+    dict(type='ExpMomentumEMAHook', resume_from=resume_from, priority=49)
+]
+checkpoint_config = dict(interval=interval)
+evaluation = dict(interval=interval, metric='bbox')
+log_config = dict(interval=50)
diff --git a/configs/yolox/yolox_tiny_8x8_300e_coco.py b/configs/yolox/yolox_tiny_8x8_300e_coco.py
@@ -0,0 +1,79 @@
+_base_ = './yolox_s_8x8_300e_coco.py'
+
+# model settings
+model = dict(
+    backbone=dict(deepen_factor=0.33, widen_factor=0.375),
+    neck=dict(in_channels=[96, 192, 384], out_channels=96),
+    bbox_head=dict(in_channels=96, feat_channels=96))
+
+# dataset settings
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+
+img_scale = (640, 640)
+
+train_pipeline = [
+    dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
+    dict(
+        type='RandomAffine',
+        scaling_ratio_range=(0.5, 1.5),
+        border=(-img_scale[0] // 2, -img_scale[1] // 2)),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Resize', keep_ratio=True),
+    dict(type='Pad', pad_to_square=True, pad_val=114.0),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+]
+
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(416, 416),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Pad', size=(416, 416), pad_val=114.0),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='DefaultFormatBundle'),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+
+train_dataset = dict(pipeline=train_pipeline)
+
+data = dict(
+    train=train_dataset,
+    val=dict(pipeline=test_pipeline),
+    test=dict(pipeline=test_pipeline))
+
+resume_from = None
+interval = 10
+
+# Execute in the order of insertion when the priority is the same.
+# The smaller the value, the higher the priority
+custom_hooks = [
+    dict(type='YOLOXModeSwitchHook', num_last_epochs=15, priority=48),
+    dict(
+        type='SyncRandomSizeHook',
+        ratio_range=(10, 20),
+        img_scale=img_scale,
+        interval=interval,
+        priority=48),
+    dict(
+        type='SyncNormHook',
+        num_last_epochs=15,
+        interval=interval,
+        priority=48),
+    dict(type='ExpMomentumEMAHook', resume_from=resume_from, priority=49)
+]
+checkpoint_config = dict(interval=interval)
+evaluation = dict(interval=interval, metric='bbox')
diff --git a/configs/yolox/yolox_x_8x8_300e_coco.py b/configs/yolox/yolox_x_8x8_300e_coco.py
@@ -0,0 +1,8 @@
+_base_ = './yolox_s_8x8_300e_coco.py'
+
+# model settings
+model = dict(
+    backbone=dict(deepen_factor=1.33, widen_factor=1.25),
+    neck=dict(
+        in_channels=[320, 640, 1280], out_channels=320, num_csp_blocks=4),
+    bbox_head=dict(in_channels=320, feat_channels=320))
diff --git a/docs/model_zoo.md b/docs/model_zoo.md
@@ -186,6 +186,10 @@ Please refer to [ResNeSt](https://github.com/open-mmlab/mmdetection/blob/master/
 
 Please refer to [DETR](https://github.com/open-mmlab/mmdetection/blob/master/configs/detr) for details.
 
+### YOLOX
+
+Please refer to [YOLOX](https://github.com/open-mmlab/mmdetection/blob/master/configs/yolox) for details.
+
 ### Other datasets
 
 We also benchmark some methods on [PASCAL VOC](https://github.com/open-mmlab/mmdetection/blob/master/configs/pascal_voc), [Cityscapes](https://github.com/open-mmlab/mmdetection/blob/master/configs/cityscapes) and [WIDER FACE](https://github.com/open-mmlab/mmdetection/blob/master/configs/wider_face).
diff --git a/model-index.yml b/model-index.yml
@@ -0,0 +1,58 @@
+Import:
+  - configs/atss/metafile.yml
+  - configs/autoassign/metafile.yml
+  - configs/cascade_rcnn/metafile.yml
+  - configs/centernet/metafile.yml
+  - configs/centripetalnet/metafile.yml
+  - configs/cornernet/metafile.yml
+  - configs/dcn/metafile.yml
+  - configs/deformable_detr/metafile.yml
+  - configs/detectors/metafile.yml
+  - configs/detr/metafile.yml
+  - configs/double_heads/metafile.yml
+  - configs/dynamic_rcnn/metafile.yml
+  - configs/empirical_attention/metafile.yml
+  - configs/faster_rcnn/metafile.yml
+  - configs/fcos/metafile.yml
+  - configs/foveabox/metafile.yml
+  - configs/fp16/metafile.yml
+  - configs/fpg/metafile.yml
+  - configs/free_anchor/metafile.yml
+  - configs/fsaf/metafile.yml
+  - configs/gcnet/metafile.yml
+  - configs/gfl/metafile.yml
+  - configs/ghm/metafile.yml
+  - configs/gn/metafile.yml
+  - configs/gn+ws/metafile.yml
+  - configs/grid_rcnn/metafile.yml
+  - configs/groie/metafile.yml
+  - configs/guided_anchoring/metafile.yml
+  - configs/hrnet/metafile.yml
+  - configs/htc/metafile.yml
+  - configs/instaboost/metafile.yml
+  - configs/ld/metafile.yml
+  - configs/libra_rcnn/metafile.yml
+  - configs/mask_rcnn/metafile.yml
+  - configs/ms_rcnn/metafile.yml
+  - configs/nas_fcos/metafile.yml
+  - configs/nas_fpn/metafile.yml
+  - configs/paa/metafile.yml
+  - configs/pafpn/metafile.yml
+  - configs/pisa/metafile.yml
+  - configs/point_rend/metafile.yml
+  - configs/regnet/metafile.yml
+  - configs/reppoints/metafile.yml
+  - configs/res2net/metafile.yml
+  - configs/resnest/metafile.yml
+  - configs/retinanet/metafile.yml
+  - configs/sabl/metafile.yml
+  - configs/scnet/metafile.yml
+  - configs/scratch/metafile.yml
+  - configs/sparse_rcnn/metafile.yml
+  - configs/ssd/metafile.yml
+  - configs/tridentnet/metafile.yml
+  - configs/vfnet/metafile.yml
+  - configs/yolact/metafile.yml
+  - configs/yolo/metafile.yml
+  - configs/yolof/metafile.yml
+  - configs/yolox/metafile.yml