diff --git a/requirements.txt b/requirements.txt index 548611e..77ee8bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,6 @@ -torch==2.6.* -torchvision==0.21.* -mmcv==2.1.* -# Update CUDA Version if necessary. -# See: https://mmcv.readthedocs.io/en/latest/get_started/installation.html#install-with-pip --f https://download.openmmlab.com/mmcv/dist/cu117/torch2.0/index.html -mmdet==3.2.* +torch==2.9.* +torchvision==0.24.* +pycocotools albumentations scikit-learn scikit-image diff --git a/src/Jobs/DetectionJob.php b/src/Jobs/DetectionJob.php index 019071a..c069b0c 100644 --- a/src/Jobs/DetectionJob.php +++ b/src/Jobs/DetectionJob.php @@ -129,13 +129,13 @@ protected function getTmpDirPath() * * @return string */ - protected function python($command, $log = 'log.txt') + protected function python($command, $log = 'log.txt', $env = '') { $code = 0; $lines = []; $python = config('maia.python'); $logFile = "{$this->tmpDir}/{$log}"; - exec("{$python} -u {$command} >{$logFile} 2>&1", $lines, $code); + exec("{$env} {$python} -u {$command} >{$logFile} 2>&1", $lines, $code); if ($code !== 0) { $lines = File::get($logFile); diff --git a/src/Jobs/ObjectDetection.php b/src/Jobs/ObjectDetection.php index 12a7a3f..c182403 100644 --- a/src/Jobs/ObjectDetection.php +++ b/src/Jobs/ObjectDetection.php @@ -156,7 +156,7 @@ protected function generateDataset($images, $paths) $imagesMap = $this->buildImagesMap($images, $paths); $inputPath = $this->createDatasetJson($imagesMap, $outputPath); - $script = config('maia.mmdet_dataset_script'); + $script = config('maia.dataset_script'); $this->python("{$script} {$inputPath}", 'dataset-log.txt'); return $outputPath; @@ -200,34 +200,15 @@ protected function createDatasetJson($imagesMap, $outputJsonPath) protected function performTraining($datasetOutputPath) { $outputPath = "{$this->tmpDir}/output-training.json"; - $this->maybeDownloadWeights(config('maia.backbone_model_url'), config('maia.backbone_model_path')); - $this->maybeDownloadWeights(config('maia.model_url'), config('maia.model_path')); $inputPath = $this->createTrainingJson($outputPath); - $script = config('maia.mmdet_training_script'); - $this->python("{$script} {$inputPath} {$datasetOutputPath}", 'training-log.txt'); + $script = config('maia.training_script'); + $tmpDir = config('maia.tmp_dir'); + $env = "TORCH_HOME=$tmpDir"; + $this->python("{$script} {$inputPath} {$datasetOutputPath}", 'training-log.txt', $env); return $outputPath; } - /** - * Downloads the model pretrained weights if they weren't downloaded yet. - * - * @param string $from - * @param string $to - * - */ - protected function maybeDownloadWeights($from, $to) - { - if (!File::exists($to)) { - $this->ensureDirectory(dirname($to)); - $success = @copy($from, $to); - - if (!$success) { - throw new Exception("Failed to download model weights from '{$from}'."); - } - } - } - /** * Create the JSON file that is the input to the training script. * @@ -242,10 +223,7 @@ protected function createTrainingJson($outputJsonPath) 'tmp_dir' => $this->tmpDir, 'max_workers' => intval(config('maia.max_workers')), 'output_path' => $outputJsonPath, - 'base_config' => config('maia.mmdet_base_config'), - 'batch_size' => config('maia.mmdet_train_batch_size'), - 'backbone_model_path' => config('maia.backbone_model_path'), - 'model_path' => config('maia.model_path'), + 'batch_size' => config('maia.train_batch_size'), ]; File::put($path, json_encode($content, JSON_UNESCAPED_SLASHES)); @@ -265,8 +243,10 @@ protected function performInference($images, $datasetOutputPath, $trainingOutput FileCache::batch($images, function ($images, $paths) use ($datasetOutputPath, $trainingOutputPath) { $imagesMap = $this->buildImagesMap($images, $paths); $inputPath = $this->createInferenceJson($imagesMap); - $script = config('maia.mmdet_inference_script'); - $this->python("{$script} {$inputPath} {$datasetOutputPath} {$trainingOutputPath}", 'inference-log.txt'); + $script = config('maia.inference_script'); + $tmpDir = config('maia.tmp_dir'); + $env = "TORCH_HOME=$tmpDir"; + $this->python("{$script} {$inputPath} {$trainingOutputPath}", 'inference-log.txt', $env); }); } diff --git a/src/config/maia.php b/src/config/maia.php index 7666b9f..1790780 100644 --- a/src/config/maia.php +++ b/src/config/maia.php @@ -72,50 +72,25 @@ /* | Path to the script that generates the training dataset for MMDetection. */ - 'mmdet_dataset_script' => __DIR__.'/../resources/scripts/object-detection/DatasetGenerator.py', + 'dataset_script' => __DIR__.'/../resources/scripts/object-detection/DatasetGenerator.py', /* | Path to the script that trains the MMDetection model. */ - 'mmdet_training_script' => __DIR__.'/../resources/scripts/object-detection/TrainingRunner.py', + 'training_script' => __DIR__.'/../resources/scripts/object-detection/TrainingRunner.py', /* | Path to the script that performs inference with the trained MMDetection model. */ - 'mmdet_inference_script' => __DIR__.'/../resources/scripts/object-detection/InferenceRunner.py', + 'inference_script' => __DIR__.'/../resources/scripts/object-detection/InferenceRunner.py', /* - | Path to the MMDetection base config file. - */ - 'mmdet_base_config' => __DIR__.'/../resources/scripts/object-detection/faster_rcnn_r50_fpn_1x.py', - - /* - | URL from which to download the pretrained weights for the model backbone. - */ - 'backbone_model_url' => env('MAIA_BACKBONE_MODEL_URL', 'https://download.pytorch.org/models/resnet50-11ad3fa6.pth'), - - /* - | URL from which to download the trained weights for the model. - */ - 'model_url' => env('MAIA_MODEL_URL', 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'), - - /* - | Path to the file to store the pretrained backbone weights to. - */ - 'backbone_model_path' => storage_path('maia_jobs').'/resnet50-11ad3fa6.pth', - - /* - | Path to the file to store the pretrained model weights to. - */ - 'model_path' => storage_path('maia_jobs').'/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth', - - /* - | Number of 512x512 px images in a training batch of MMDetection. + | Number of 512x512 px images in one training batch. | This can be increased with larger GPU memory to achieve faster training. | | Default is 16. */ - 'mmdet_train_batch_size' => env('MAIA_MMDET_TRAIN_BATCH_SIZE', 16), + 'train_batch_size' => env('MAIA_TRAIN_BATCH_SIZE', 16), 'notifications' => [ diff --git a/src/resources/scripts/object-detection/DatasetGenerator.py b/src/resources/scripts/object-detection/DatasetGenerator.py index e277dfc..d848f52 100644 --- a/src/resources/scripts/object-detection/DatasetGenerator.py +++ b/src/resources/scripts/object-detection/DatasetGenerator.py @@ -64,7 +64,7 @@ def generate(self): 'images': image_list, 'annotations': annotation_list, 'categories': [{ - 'id': 0, + 'id': 1, 'name': 'interesting', 'supercategory': 'interesting', }], @@ -84,6 +84,14 @@ def process_image(self, imageId, proposals): scale_factor = self.scale_factors[imageId] width = int(round(image.width * scale_factor)) height = int(round(image.height * scale_factor)) + # Make sure the scaling does not increase the image size too much + # (e.g. if there is an error in the metadata). Use a maximum of + # 15,000x15,000 pixels. + too_large_factor = min(15000 / width, 15000 / height) + if too_large_factor < 1.0: + width = int(round(width * too_large_factor)) + height = int(round(height * too_large_factor)) + scale_factor = scale_factor * too_large_factor image_format = image.format image = image.resize((width, height)) proposals = np.round(np.array(proposals, dtype=np.float32) * scale_factor) @@ -102,7 +110,7 @@ def process_image(self, imageId, proposals): annotations.append({ 'id': 0, # Placeholder, will be updated to an uniwue ID later. 'image_id': int(imageId), - 'category_id': 0, # There is only one category. + 'category_id': 1, # There is only one category. 'bbox': [ int(p[0] - p[2]), # px int(p[1] - p[2]), # py diff --git a/src/resources/scripts/object-detection/InferenceRunner.py b/src/resources/scripts/object-detection/InferenceRunner.py index 288c094..09d00e8 100644 --- a/src/resources/scripts/object-detection/InferenceRunner.py +++ b/src/resources/scripts/object-detection/InferenceRunner.py @@ -1,65 +1,73 @@ import sys import os import json -from torch import cuda -from torch.serialization import safe_globals from concurrent.futures import ThreadPoolExecutor -from mmdet.apis import init_detector, inference_detector -from mmengine.logging.history_buffer import HistoryBuffer -from numpy.core.multiarray import _reconstruct -from numpy import ndarray -from numpy import dtype -from numpy.core.multiarray import scalar + +import torch +from PIL import Image +import albumentations as A +import numpy as np +from torch_utils import get_model +from albumentations.pytorch import ToTensorV2 class InferenceRunner(object): - def __init__(self, params, trainset): + def __init__(self, params): # Path to the directory to store temporary files. self.tmp_dir = params['tmp_dir'] # Path to the trained model to use for inference. self.checkpoint_path = params['checkpoint_path'] - # Path to the MMDetection config. - self.config_path = params['config_path'] # We need at least one worker thread here. self.max_workers = max(int(params['max_workers']), 1) - self.cfg_options = { - 'gpu_ids': [0], - } + self.num_classes = params['num_classes'] self.images = {k: v for k, v in params['images'].items()} + self.transforms = A.Compose([ + A.ToFloat(), + ToTensorV2(), + ]) + def run(self): - device = 'cuda:0' if cuda.is_available() else 'cpu' - custom_safe_globals = [ - HistoryBuffer, - _reconstruct, - ndarray, - dtype, - type(dtype('float64')), - type(dtype('int64')), - getattr, - scalar, - ] - - with safe_globals(custom_safe_globals): - model = init_detector(self.config_path, checkpoint=self.checkpoint_path, device=device, cfg_options=self.cfg_options) + model = get_model( + self.num_classes, + # The original config had rpn_nms_thresh=0.7 and box_nms_thresh=0.5. + # Lowered, because of many overlapping boxes for the same objects in tests. + rpn_nms_thresh=0.2, + box_nms_thresh=0.2, + # Increase default max_size of 1333. + max_size=8192, + # Use same min_size enforced by BIIGLE. + min_size=512, + ) + model.load_state_dict(torch.load(self.checkpoint_path)) + + model.eval() + + device = torch.accelerator.current_accelerator() if torch.accelerator.is_available() else torch.device('cpu') + model.to(device) executor = ThreadPoolExecutor(max_workers=self.max_workers) total_images = len(self.images) - for index, (image_id, image_path) in enumerate(self.images.items()): - print('Image {} of {} (#{})'.format(index + 1, total_images, image_id)) - result = inference_detector(model, image_path) - executor.submit(self.process_result, image_id, result.pred_instances) + with torch.inference_mode(): + for index, (image_id, image_path) in enumerate(self.images.items()): + print('Image {} of {} (#{})'.format(index + 1, total_images, image_id)) + image = np.array(Image.open(image_path).convert('RGB')) + image = self.transforms(image=image)['image'].to(device) + result = model([image])[0] + + executor.submit(self.process_result, image_id, result) # Wait for pending jobs of the postprocessing. executor.shutdown(True) def process_result(self, image_id, pred): points = [] - for bbox, score, label in zip(pred.bboxes, pred.scores, pred.labels): - x1, y1, x2, y2 = bbox.detach().cpu().numpy() + boxes = pred['boxes'].detach().cpu().numpy() + for bbox, score, label in zip(boxes, pred['scores'], pred['labels']): + x1, y1, x2, y2 = bbox r = round(max(x2 - x1, y2 - y1) / 2, 2) x = round((x1 + x2) / 2, 2) y = round((y1 + y2) / 2, 2) @@ -74,13 +82,10 @@ def process_result(self, image_id, pred): params = json.load(f) with open(sys.argv[2]) as f: - trainset = json.load(f) - - with open(sys.argv[3]) as f: train_params = json.load(f) params['checkpoint_path'] = train_params['checkpoint_path'] - params['config_path'] = train_params['config_path'] + params['num_classes'] = train_params['num_classes'] - runner = InferenceRunner(params, trainset) + runner = InferenceRunner(params) runner.run() diff --git a/src/resources/scripts/object-detection/TrainingRunner.py b/src/resources/scripts/object-detection/TrainingRunner.py index 5a806bf..5cf8ab6 100644 --- a/src/resources/scripts/object-detection/TrainingRunner.py +++ b/src/resources/scripts/object-detection/TrainingRunner.py @@ -1,98 +1,135 @@ +from albumentations.pytorch import ToTensorV2 +from torch.amp import GradScaler +from torch.utils.data import DataLoader +from torch_utils import collate_fn, train_one_epoch, get_model +from torchvision.datasets import CocoDetection +import albumentations as A +import json +import numpy as np import os import sys -import json - -import time - -from mmengine.config import Config -from mmengine.runner import Runner -from mmdet.utils import setup_cache_size_limit_of_dynamo - -class TrainingRunner(object): - - def __init__(self, params, trainset): - # Path to the directory to store temporary files. - self.tmp_dir = params['tmp_dir'] - - self.base_config = params['base_config'] - - self.dump_config_name = 'mmdet_config.py' - - self.cfg_options = { - # Path to store the logfiles and final checkpoint to. - 'work_dir': os.path.join(self.tmp_dir, 'work_dir'), - 'model': { - 'backbone': { - 'init_cfg': { - # Path to the pretrained weights for the backbone - 'checkpoint': params['backbone_model_path'], - }, - }, - }, - # Path to the pretrained weights for the rest of the network - 'load_from': params['model_path'], - 'train_dataloader': { - # If multi-GPU training is implemented at some point, divide this by the - # number of GPUs! - 'batch_size': int(params['batch_size']), - 'num_workers': int(params['max_workers']), - 'dataset': { - 'ann_file': trainset['ann_file'], - 'data_prefix': { - 'img': trainset['img_prefix'], - }, - }, - }, - 'test_dataloader': { - 'dataset': { - 'ann_file': trainset['ann_file'], - 'data_prefix': { - 'img': trainset['img_prefix'], - }, - }, - }, - 'test_evaluator': { - 'ann_file': trainset['ann_file'], - }, - 'classes': trainset['classes'], - 'gpu_ids': [0], - } - - # Based on: https://github.com/open-mmlab/mmdetection/blob/master/tools/train.py - def run(self): - # Reduce the number of repeated compilations and improve - # training speed. - setup_cache_size_limit_of_dynamo() - - # load config - cfg = Config.fromfile(self.base_config) - - cfg.merge_from_dict(self.cfg_options) - - if not os.path.exists(cfg.work_dir): - os.makedirs(cfg.work_dir) - - # dump config - cfg.dump(os.path.join(cfg.work_dir, self.dump_config_name)) - - runner = Runner.from_cfg(cfg) - runner.train() +import torch + +# Based on: https://docs.pytorch.org/tutorials/intermediate/torchvision_tutorial.html#torchvision-object-detection-finetuning-tutorial + +with open(sys.argv[1]) as f: + params = json.load(f) + +with open(sys.argv[2]) as f: + trainset = json.load(f) + +num_classes = len(trainset['classes']) +model = get_model( + num_classes, + # Use same min_size than random crop transform. + min_size=512, + # Train all but the first (of 5) layers. + trainable_backbone_layers=4, +) + +bbox_params = A.BboxParams( + format='pascal_voc', + label_fields=['labels'], + # Clip boxes that overflow image boundaries. + clip=True, + # Remove all boxes with less than 10% of their content in the random crop. + min_visibility=0.1, + filter_invalid_bboxes=True, +) +transforms = A.Compose([ + A.AtLeastOneBBoxRandomCrop(512, 512), + A.SomeOf([ + A.HorizontalFlip(p=0.25), + A.VerticalFlip(p=0.25), + A.RandomRotate90(p=0.25), + A.GaussianBlur(sigma_limit=[1.0, 2.0]), + A.ImageCompression(quality_range=[25, 50]), + ], n=4, p=0.25), + A.ToFloat(), + ToTensorV2(), +], bbox_params=bbox_params) + +# Custom dataset for compatibility with Albumentations. +class MyCocoDetection(CocoDetection): + def _load_target(self, id): + anns = self.coco.loadAnns(self.coco.getAnnIds(id)) + boxes = [a['bbox'] for a in anns] + boxes = [[b[0], b[1], b[0] + b[2], b[1] + b[3]] for b in boxes] + labels = [a['category_id'] for a in anns] return { - 'work_dir': cfg.work_dir, - 'checkpoint_path': os.path.join(cfg.work_dir, 'epoch_12.pth'), - 'config_path': os.path.join(cfg.work_dir, self.dump_config_name), + 'boxes': boxes, + 'labels': labels, } -if __name__ == '__main__': - with open(sys.argv[1]) as f: - params = json.load(f) - - with open(sys.argv[2]) as f: - trainset = json.load(f) - - runner = TrainingRunner(params, trainset) - output = runner.run() - - with open(params['output_path'], 'w') as f: - json.dump(output, f) + def __getitem__(self, index): + if not isinstance(index, int): + raise ValueError(f"Index must be of type integer, got {type(index)} instead.") + + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + + if self.transforms is not None: + ret = self.transforms( + image=np.array(image), + bboxes=np.array(target['boxes'], dtype=np.float32), + labels=target['labels'] + ) + image = ret['image'] + target['boxes'] = torch.tensor(ret['bboxes']) + target['labels'] = torch.tensor(ret['labels'], dtype=torch.int64) + + return image, target + +dataset = MyCocoDetection(root=trainset['img_prefix'], annFile=trainset['ann_file'], transforms=transforms) + +data_loader = DataLoader( + dataset, + batch_size=int(params['batch_size']), + shuffle=True, + num_workers=int(params['max_workers']), + persistent_workers=True, + pin_memory=True, + collate_fn=collate_fn +) + +# Scale the learn rate for different batch sizes. +base_batch_size = 16.0 +lr = 0.005 * float(params['batch_size']) / base_batch_size + +optim_params = [p for p in model.parameters() if p.requires_grad] +optimizer = torch.optim.SGD( + optim_params, + lr=lr, + momentum=0.9, + weight_decay=0.0005 +) + +lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( + optimizer, + milestones=[8, 11], + gamma=0.1 +) + +device = torch.accelerator.current_accelerator() if torch.accelerator.is_available() else torch.device('cpu') +model.to(device) + +scaler = GradScaler(device) + +num_epochs = 12 + +for epoch in range(num_epochs): + train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10, scaler=scaler) + lr_scheduler.step() + +checkpoint_path = os.path.join(params['tmp_dir'], 'model.pth') +torch.save(model.state_dict(), checkpoint_path) + +output = { + 'checkpoint_path': checkpoint_path, + 'num_classes': num_classes, +} + +with open(params['output_path'], 'w') as f: + json.dump(output, f) diff --git a/src/resources/scripts/object-detection/faster_rcnn_r50_fpn_1x.py b/src/resources/scripts/object-detection/faster_rcnn_r50_fpn_1x.py deleted file mode 100644 index 78b8786..0000000 --- a/src/resources/scripts/object-detection/faster_rcnn_r50_fpn_1x.py +++ /dev/null @@ -1,257 +0,0 @@ -classes = ('interesting', ) - -model = dict( - type='FasterRCNN', - data_preprocessor=dict( - type='DetDataPreprocessor', - mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - bgr_to_rgb=True, - pad_size_divisor=32), - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - norm_eval=True, - style='pytorch', - init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_generator=dict( - type='AnchorGenerator', - scales=[8], - ratios=[0.5, 1.0, 2.0], - strides=[4, 8, 16, 32, 64]), - bbox_coder=dict( - type='DeltaXYWHBBoxCoder', - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[1.0, 1.0, 1.0, 1.0]), - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='L1Loss', loss_weight=1.0)), - roi_head=dict( - type='StandardRoIHead', - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='Shared2FCBBoxHead', - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=1, - bbox_coder=dict( - type='DeltaXYWHBBoxCoder', - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[0.1, 0.1, 0.2, 0.2]), - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='L1Loss', loss_weight=1.0))), - train_cfg=dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - match_low_quality=True, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=-1, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_pre=2000, - max_per_img=1000, - nms=dict(type='nms', iou_threshold=0.7), - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - match_low_quality=False, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)), - test_cfg=dict( - rpn=dict( - nms_pre=1000, - max_per_img=1000, - # The original config had iou_threshold=0.7. - # Lowered, because of many overlapping boxes for the same objects in tests. - nms=dict(type='nms', iou_threshold=0.2), - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - # The original config had iou_threshold=0.5. - # Lowered, because of many overlapping boxes for the same objects in tests. - nms=dict(type='nms', iou_threshold=0.2), - max_per_img=100))) - -dataset_type = 'CocoDataset' - -train_pipeline = [ - # Use color_type color_ignore_orientation to ignore EXIF orientation! - # See: https://github.com/open-mmlab/mmcv/blob/0b005c52b4571f7cd1a7a882a5acecef6357ef0f/mmcv/image/io.py#L145 - dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='RandomCrop', crop_size=(512, 512)), - # Example: https://github.com/open-mmlab/mmdetection/blob/master/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py#L44 - dict( - type='Albu', - skip_img_without_anno=True, - bbox_params=dict( - type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', 'gt_ignore_flags'], - filter_lost_elements=True, - min_area=100), - keymap=dict(img='image', gt_masks='masks', gt_bboxes='bboxes'), - transforms=[ - dict( - type='SomeOf', - # Choose each element with equal probability. - n=4, - p=0.25, - replace=False, - transforms=[ - dict(type='Flip'), - dict(type='RandomRotate90'), - dict(type='GaussianBlur', sigma_limit=[1.0, 2.0]), - dict(type='ImageCompression', quality_lower=25, quality_upper=50), - ]) - ]), - dict(type='PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape')) -] - -test_pipeline = [ - # Use color_type color_ignore_orientation to ignore EXIF orientation! - # See: https://github.com/open-mmlab/mmcv/blob/0b005c52b4571f7cd1a7a882a5acecef6357ef0f/mmcv/image/io.py#L145 - dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), - # Required to add the scale_factor to the meta keys. Some action needs this value in the pipeline. - dict(type='Resize', scale_factor=1.0), - dict(type='PackDetInputs', meta_keys=('img_id', 'ori_shape', 'img_shape', 'scale_factor')) -] - -train_dataloader = dict( - batch_size=16, - num_workers=2, - persistent_workers=True, # Avoid recreating subprocesses after each iteration - sampler=dict(type='DefaultSampler', shuffle=True), # Default sampler, supports both distributed and non-distributed training - batch_sampler=dict(type='AspectRatioBatchSampler'), # Default batch_sampler, used to ensure that images in the batch have similar aspect ratios, so as to better utilize graphics memory - dataset=dict( - type=dataset_type, - metainfo=dict(classes=classes), - ann_file='', - data_prefix=dict(img=''), - filter_cfg=dict(filter_empty_gt=True, min_size=16), - pipeline=train_pipeline)) - -val_dataloader = None -test_dataloader = dict( - batch_size=1, - num_workers=2, - persistent_workers=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - metainfo=dict(classes=classes), - ann_file='', - data_prefix=dict(img=''), - test_mode=True, - pipeline=test_pipeline)) - -val_evaluator = None -test_evaluator = dict( - type='CocoMetric', - ann_file='', - metric=['bbox'], - format_only=False) - -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict( - type='SGD', - lr=0.02, - momentum=0.9, - weight_decay=0.0001), - clip_grad=None) - -epochs = 12 - -param_scheduler = [ - dict( - type='LinearLR', # Use linear learning rate warmup - start_factor=0.001, - by_epoch=False, - begin=0, - end=500), - dict( - type='MultiStepLR', # Use multi-step learning rate strategy during training - by_epoch=True, - begin=0, - end=epochs, - milestones=[8, 11]) -] - -train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=epochs, val_interval=1) -val_cfg = None -test_cfg = dict(type='TestLoop') - -default_hooks = dict( - checkpoint=dict(type='CheckpointHook', interval=epochs), - logger=dict(type='LoggerHook', interval=1)) - -custom_hooks = [dict(type='NumClassCheckHook')] - -dist_params = dict(backend='nccl') - -log_level = 'INFO' - -load_from = '' - -resume = None - -workflow = [('train', 1)] - -env_cfg = dict( - mp_cfg=dict(mp_start_method='fork', - opencv_num_threads=0)) - -# Don't change the base_batch_size. -# See: https://mmdetection.readthedocs.io/en/dev/1_exist_data_model.html#learning-rate-automatically-scale -auto_scale_lr = dict(enable=True, base_batch_size=16) - -work_dir = '' - -auto_resume = False - -# See: https://github.com/open-mmlab/mmdetection/issues/10052#issuecomment-1607320127 -default_scope = 'mmdet' diff --git a/src/resources/scripts/object-detection/torch_utils.py b/src/resources/scripts/object-detection/torch_utils.py new file mode 100644 index 0000000..a3b6611 --- /dev/null +++ b/src/resources/scripts/object-detection/torch_utils.py @@ -0,0 +1,293 @@ +import datetime +import time +from collections import defaultdict, deque +import math + +import torch +import torch.distributed as dist +from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2 +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor + +# Based on https://github.com/pytorch/vision/blob/main/references/detection/utils.py +# and https://github.com/pytorch/vision/blob/main/references/detection/engine.py + +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +def get_model(num_classes, **kwargs): + model = fasterrcnn_resnet50_fpn_v2(weights='DEFAULT', **kwargs) + # Replace the classifier with a new one having the user-defined number of classes. + num_classes = num_classes + 1 # +1 for background + in_features = model.roi_heads.box_predictor.cls_score.in_features + model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) + + return model + +def collate_fn(batch): + return tuple(zip(*batch)) + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + +def reduce_dict(input_dict, average=True): + """ + Args: + input_dict (dict): all the values will be reduced + average (bool): whether to do average or sum + Reduce the values in the dictionary from all processes so that all processes + have the averaged results. Returns a dict with the same fields as + input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.inference_mode(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.all_reduce(values) + if average: + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict + + +class SmoothedValue: + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self): + """ + Warning: does not synchronize the deque! + """ + if not is_dist_avail_and_initialized(): + return + t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda") + dist.barrier() + dist.all_reduce(t) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value + ) + +class MetricLogger: + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append(f"{name}: {str(meter)}") + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self): + for meter in self.meters.values(): + meter.synchronize_between_processes() + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, print_freq, header=None): + i = 0 + if not header: + header = "" + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt="{avg:.4f}") + data_time = SmoothedValue(fmt="{avg:.4f}") + space_fmt = ":" + str(len(str(len(iterable)))) + "d" + if torch.cuda.is_available(): + log_msg = self.delimiter.join( + [ + header, + "[{0" + space_fmt + "}/{1}]", + "eta: {eta}", + "{meters}", + "time: {time}", + "data: {data}", + "max mem: {memory:.0f}", + ] + ) + else: + log_msg = self.delimiter.join( + [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"] + ) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if i % print_freq == 0 or i == len(iterable) - 1: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch.cuda.is_available(): + print( + log_msg.format( + i, + len(iterable), + eta=eta_string, + meters=str(self), + time=str(iter_time), + data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB, + ) + ) + else: + print( + log_msg.format( + i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time) + ) + ) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print(f"{header} Total time: {total_time_str} ({total_time / len(iterable):.4f} s / it)") + +def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, scaler=None): + model.train() + metric_logger = MetricLogger(delimiter=" ") + metric_logger.add_meter("lr", SmoothedValue(window_size=1, fmt="{value:.6f}")) + header = f"Epoch: [{epoch}]" + + lr_scheduler = None + if epoch == 0: + warmup_factor = 1.0 / 1000 + warmup_iters = min(1000, len(data_loader) - 1) + + lr_scheduler = torch.optim.lr_scheduler.LinearLR( + optimizer, start_factor=warmup_factor, total_iters=warmup_iters + ) + + for images, targets in metric_logger.log_every(data_loader, print_freq, header): + images = list(image.to(device) for image in images) + targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets] + with torch.amp.autocast(str(device), enabled=scaler is not None): + loss_dict = model(images, targets) + losses = sum(loss for loss in loss_dict.values()) + + # reduce losses over all GPUs for logging purposes + loss_dict_reduced = reduce_dict(loss_dict) + losses_reduced = sum(loss for loss in loss_dict_reduced.values()) + + loss_value = losses_reduced.item() + + if not math.isfinite(loss_value): + print(f"Loss is {loss_value}, stopping training") + print(loss_dict_reduced) + sys.exit(1) + + optimizer.zero_grad() + if scaler is not None: + scaler.scale(losses).backward() + scaler.step(optimizer) + scaler.update() + else: + losses.backward() + optimizer.step() + + if lr_scheduler is not None: + lr_scheduler.step() + + metric_logger.update(loss=losses_reduced, **loss_dict_reduced) + metric_logger.update(lr=optimizer.param_groups[0]["lr"]) + + return metric_logger diff --git a/tests/Jobs/NoveltyDetectionTest.php b/tests/Jobs/NoveltyDetectionTest.php index 1539f92..9858ead 100644 --- a/tests/Jobs/NoveltyDetectionTest.php +++ b/tests/Jobs/NoveltyDetectionTest.php @@ -173,7 +173,7 @@ class NdJobStub extends NoveltyDetection public $crash = false; public $annotations = "[[100,200,20,0.9]]"; - protected function python($command, $log = 'log.txt') + protected function python($command, $log = 'log.txt', $env = '') { $this->command = $command; $imageId = $this->job->volume->images()->first()->id; diff --git a/tests/Jobs/ObjectDetectionTest.php b/tests/Jobs/ObjectDetectionTest.php index 40ada15..fea9865 100644 --- a/tests/Jobs/ObjectDetectionTest.php +++ b/tests/Jobs/ObjectDetectionTest.php @@ -28,7 +28,7 @@ public function testHandle() { FileCache::fake(); config([ - 'maia.mmdet_train_batch_size' => 12, + 'maia.train_batch_size' => 12, 'maia.max_workers' => 2, ]); @@ -71,10 +71,7 @@ public function testHandle() 'tmp_dir' => $tmpDir, 'max_workers' => 2, 'output_path' => "{$tmpDir}/output-training.json", - 'base_config' => config('maia.mmdet_base_config'), 'batch_size' => 12, - 'backbone_model_path' => config('maia.backbone_model_path'), - 'model_path' => config('maia.model_path'), ]; $expectInferenceJson = [ @@ -109,7 +106,7 @@ public function testHandle() $this->assertArrayHasKey($image2->id, $inputJson['images']); unset($inputJson['images']); $this->assertSame($expectInferenceJson, $inputJson); - $this->assertStringContainsString("InferenceRunner.py {$inferenceInputJsonPath} {$datasetOutputJsonPath} {$trainingOutputJsonPath}", $request->commands[2]); + $this->assertStringContainsString("InferenceRunner.py {$inferenceInputJsonPath} {$trainingOutputJsonPath}", $request->commands[2]); $this->assertSame(State::annotationCandidatesId(), $job->fresh()->state_id); @@ -184,7 +181,7 @@ public function testHandleKnowledgeTransfer() { FileCache::fake(); config([ - 'maia.mmdet_train_batch_size' => 12, + 'maia.train_batch_size' => 12, 'maia.max_workers' => 2, ]); @@ -240,10 +237,7 @@ public function testHandleKnowledgeTransfer() 'tmp_dir' => $tmpDir, 'max_workers' => 2, 'output_path' => "{$tmpDir}/output-training.json", - 'base_config' => config('maia.mmdet_base_config'), 'batch_size' => 12, - 'backbone_model_path' => config('maia.backbone_model_path'), - 'model_path' => config('maia.model_path'), ]; $expectInferenceJson = [ @@ -277,7 +271,7 @@ public function testHandleKnowledgeTransfer() $this->assertArrayHasKey($ownImage->id, $inputJson['images']); unset($inputJson['images']); $this->assertSame($expectInferenceJson, $inputJson); - $this->assertStringContainsString("InferenceRunner.py {$inferenceInputJsonPath} {$datasetOutputJsonPath} {$trainingOutputJsonPath}", $request->commands[2]); + $this->assertStringContainsString("InferenceRunner.py {$inferenceInputJsonPath} {$trainingOutputJsonPath}", $request->commands[2]); $annotations = $job->annotationCandidates()->get(); $this->assertSame(1, $annotations->count()); @@ -295,7 +289,7 @@ public function testHandleAreaKnowledgeTransfer() { FileCache::fake(); config([ - 'maia.mmdet_train_batch_size' => 12, + 'maia.train_batch_size' => 12, 'maia.max_workers' => 2, ]); @@ -367,7 +361,7 @@ public function testHandleRollbackOnError() { FileCache::fake(); config([ - 'maia.mmdet_train_batch_size' => 12, + 'maia.train_batch_size' => 12, 'maia.max_workers' => 2, ]); @@ -443,7 +437,7 @@ protected function maybeDownloadWeights($from, $to) // do nothing } - protected function python($command, $log = 'log.txt') + protected function python($command, $log = 'log.txt', $env = '') { array_push($this->commands, $command);