biigle · mzur · Nov 14, 2025 · Nov 14, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/requirements.txt b/requirements.txt
@@ -1,10 +1,6 @@
-torch==2.6.*
-torchvision==0.21.*
-mmcv==2.1.*
-# Update CUDA Version if necessary.
-# See: https://mmcv.readthedocs.io/en/latest/get_started/installation.html#install-with-pip
--f https://download.openmmlab.com/mmcv/dist/cu117/torch2.0/index.html
-mmdet==3.2.*
+torch==2.9.*
+torchvision==0.24.*
+pycocotools
 albumentations
 scikit-learn
 scikit-image
diff --git a/src/Jobs/DetectionJob.php b/src/Jobs/DetectionJob.php
@@ -129,13 +129,13 @@ protected function getTmpDirPath()
      *
      * @return string
      */
-    protected function python($command, $log = 'log.txt')
+    protected function python($command, $log = 'log.txt', $env = '')
     {
         $code = 0;
         $lines = [];
         $python = config('maia.python');
         $logFile = "{$this->tmpDir}/{$log}";
-        exec("{$python} -u {$command} >{$logFile} 2>&1", $lines, $code);
+        exec("{$env} {$python} -u {$command} >{$logFile} 2>&1", $lines, $code);
 
         if ($code !== 0) {
             $lines = File::get($logFile);

diff --git a/src/Jobs/ObjectDetection.php b/src/Jobs/ObjectDetection.php
@@ -156,7 +156,7 @@ protected function generateDataset($images, $paths)
 
         $imagesMap = $this->buildImagesMap($images, $paths);
         $inputPath = $this->createDatasetJson($imagesMap, $outputPath);
-        $script = config('maia.mmdet_dataset_script');
+        $script = config('maia.dataset_script');
         $this->python("{$script} {$inputPath}", 'dataset-log.txt');
 
         return $outputPath;
@@ -200,34 +200,15 @@ protected function createDatasetJson($imagesMap, $outputJsonPath)
     protected function performTraining($datasetOutputPath)
     {
         $outputPath = "{$this->tmpDir}/output-training.json";
-        $this->maybeDownloadWeights(config('maia.backbone_model_url'), config('maia.backbone_model_path'));
-        $this->maybeDownloadWeights(config('maia.model_url'), config('maia.model_path'));
         $inputPath = $this->createTrainingJson($outputPath);
-        $script = config('maia.mmdet_training_script');
-        $this->python("{$script} {$inputPath} {$datasetOutputPath}", 'training-log.txt');
+        $script = config('maia.training_script');
+        $tmpDir = config('maia.tmp_dir');
+        $env = "TORCH_HOME=$tmpDir";
+        $this->python("{$script} {$inputPath} {$datasetOutputPath}", 'training-log.txt', $env);
 
         return $outputPath;
     }
 
-    /**
-     * Downloads the model pretrained weights if they weren't downloaded yet.
-     *
-     * @param string $from
-     * @param string $to
-     *
-     */
-    protected function maybeDownloadWeights($from, $to)
-    {
-        if (!File::exists($to)) {
-            $this->ensureDirectory(dirname($to));
-            $success = @copy($from, $to);
-
-            if (!$success) {
-                throw new Exception("Failed to download model weights from '{$from}'.");
-            }
-        }
-    }
-
     /**
      * Create the JSON file that is the input to the training script.
      *
@@ -242,10 +223,7 @@ protected function createTrainingJson($outputJsonPath)
             'tmp_dir' => $this->tmpDir,
             'max_workers' => intval(config('maia.max_workers')),
             'output_path' => $outputJsonPath,
-            'base_config' => config('maia.mmdet_base_config'),
-            'batch_size' => config('maia.mmdet_train_batch_size'),
-            'backbone_model_path' => config('maia.backbone_model_path'),
-            'model_path' => config('maia.model_path'),
+            'batch_size' => config('maia.train_batch_size'),
         ];
 
         File::put($path, json_encode($content, JSON_UNESCAPED_SLASHES));
@@ -265,8 +243,10 @@ protected function performInference($images, $datasetOutputPath, $trainingOutput
         FileCache::batch($images, function ($images, $paths) use ($datasetOutputPath, $trainingOutputPath) {
             $imagesMap = $this->buildImagesMap($images, $paths);
             $inputPath = $this->createInferenceJson($imagesMap);
-            $script = config('maia.mmdet_inference_script');
-            $this->python("{$script} {$inputPath} {$datasetOutputPath} {$trainingOutputPath}", 'inference-log.txt');
+            $script = config('maia.inference_script');
+            $tmpDir = config('maia.tmp_dir');
+            $env = "TORCH_HOME=$tmpDir";
+            $this->python("{$script} {$inputPath} {$trainingOutputPath}", 'inference-log.txt', $env);
         });
     }
 

diff --git a/src/config/maia.php b/src/config/maia.php
@@ -72,50 +72,25 @@
     /*
     | Path to the script that generates the training dataset for MMDetection.
     */
-    'mmdet_dataset_script' => __DIR__.'/../resources/scripts/object-detection/DatasetGenerator.py',
+    'dataset_script' => __DIR__.'/../resources/scripts/object-detection/DatasetGenerator.py',
 
     /*
     | Path to the script that trains the MMDetection model.
     */
-    'mmdet_training_script' => __DIR__.'/../resources/scripts/object-detection/TrainingRunner.py',
+    'training_script' => __DIR__.'/../resources/scripts/object-detection/TrainingRunner.py',
 
     /*
     | Path to the script that performs inference with the trained MMDetection model.
     */
-    'mmdet_inference_script' => __DIR__.'/../resources/scripts/object-detection/InferenceRunner.py',
+    'inference_script' => __DIR__.'/../resources/scripts/object-detection/InferenceRunner.py',
 
     /*
-    | Path to the MMDetection base config file.
-    */
-    'mmdet_base_config' => __DIR__.'/../resources/scripts/object-detection/faster_rcnn_r50_fpn_1x.py',
-
-    /*
-    | URL from which to download the pretrained weights for the model backbone.
-    */
-    'backbone_model_url' => env('MAIA_BACKBONE_MODEL_URL', 'https://download.pytorch.org/models/resnet50-11ad3fa6.pth'),
-
-    /*
-    | URL from which to download the trained weights for the model.
-    */
-    'model_url' => env('MAIA_MODEL_URL', 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'),
-
-    /*
-    | Path to the file to store the pretrained backbone weights to.
-    */
-    'backbone_model_path' => storage_path('maia_jobs').'/resnet50-11ad3fa6.pth',
-
-    /*
-    | Path to the file to store the pretrained model weights to.
-    */
-    'model_path' => storage_path('maia_jobs').'/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth',
-
-    /*
-    | Number of 512x512 px images in a training batch of MMDetection.
+    | Number of 512x512 px images in one training batch.
     | This can be increased with larger GPU memory to achieve faster training.
     |
     | Default is 16.
     */
-    'mmdet_train_batch_size' => env('MAIA_MMDET_TRAIN_BATCH_SIZE', 16),
+    'train_batch_size' => env('MAIA_TRAIN_BATCH_SIZE', 16),
 
 
     'notifications' => [

diff --git a/src/resources/scripts/object-detection/DatasetGenerator.py b/src/resources/scripts/object-detection/DatasetGenerator.py
@@ -64,7 +64,7 @@ def generate(self):
             'images': image_list,
             'annotations': annotation_list,
             'categories': [{
-                'id': 0,
+                'id': 1,
                 'name': 'interesting',
                 'supercategory': 'interesting',
             }],
@@ -84,6 +84,14 @@ def process_image(self, imageId, proposals):
                 scale_factor = self.scale_factors[imageId]
                 width = int(round(image.width * scale_factor))
                 height = int(round(image.height * scale_factor))
+                # Make sure the scaling does not increase the image size too much
+                # (e.g. if there is an error in the metadata). Use a maximum of
+                # 15,000x15,000 pixels.
+                too_large_factor = min(15000 / width, 15000 / height)
+                if too_large_factor < 1.0:
+                    width = int(round(width * too_large_factor))
+                    height = int(round(height * too_large_factor))
+                    scale_factor = scale_factor * too_large_factor
                 image_format = image.format
                 image = image.resize((width, height))
                 proposals = np.round(np.array(proposals, dtype=np.float32) * scale_factor)
@@ -102,7 +110,7 @@ def process_image(self, imageId, proposals):
                 annotations.append({
                     'id': 0, # Placeholder, will be updated to an uniwue ID later.
                     'image_id': int(imageId),
-                    'category_id': 0, # There is only one category.
+                    'category_id': 1, # There is only one category.
                     'bbox': [
                         int(p[0] - p[2]), # px
                         int(p[1] - p[2]), # py

diff --git a/src/resources/scripts/object-detection/InferenceRunner.py b/src/resources/scripts/object-detection/InferenceRunner.py
@@ -1,65 +1,73 @@
 import sys
 import os
 import json
-from torch import cuda
-from torch.serialization import safe_globals
 from concurrent.futures import ThreadPoolExecutor
-from mmdet.apis import init_detector, inference_detector
-from mmengine.logging.history_buffer import HistoryBuffer
-from numpy.core.multiarray import _reconstruct
-from numpy import ndarray
-from numpy import dtype
-from numpy.core.multiarray import scalar
+
+import torch
+from PIL import Image
+import albumentations as A
+import numpy as np
+from torch_utils import get_model
+from albumentations.pytorch import ToTensorV2
 
 class InferenceRunner(object):
 
-    def __init__(self, params, trainset):
+    def __init__(self, params):
         # Path to the directory to store temporary files.
         self.tmp_dir = params['tmp_dir']
         # Path to the trained model to use for inference.
         self.checkpoint_path = params['checkpoint_path']
-        # Path to the MMDetection config.
-        self.config_path = params['config_path']
         # We need at least one worker thread here.
         self.max_workers = max(int(params['max_workers']), 1)
 
-        self.cfg_options = {
-            'gpu_ids': [0],
-        }
+        self.num_classes = params['num_classes']
 
         self.images = {k: v for k, v in params['images'].items()}
 
+        self.transforms = A.Compose([
+            A.ToFloat(),
+            ToTensorV2(),
+        ])
+
     def run(self):
-        device = 'cuda:0' if cuda.is_available() else 'cpu'
-        custom_safe_globals = [
-            HistoryBuffer,
-            _reconstruct,
-            ndarray,
-            dtype,
-            type(dtype('float64')),
-            type(dtype('int64')),
-            getattr,
-            scalar,
-        ]
-
-        with safe_globals(custom_safe_globals):
-            model = init_detector(self.config_path, checkpoint=self.checkpoint_path, device=device, cfg_options=self.cfg_options)
+        model = get_model(
+            self.num_classes,
+            # The original config had rpn_nms_thresh=0.7 and box_nms_thresh=0.5.
+            # Lowered, because of many overlapping boxes for the same objects in tests.
+            rpn_nms_thresh=0.2,
+            box_nms_thresh=0.2,
+            # Increase default max_size of 1333.
+            max_size=8192,
+            # Use same min_size enforced by BIIGLE.
+            min_size=512,
+        )
+        model.load_state_dict(torch.load(self.checkpoint_path))
+
+        model.eval()
+
+        device = torch.accelerator.current_accelerator() if torch.accelerator.is_available() else torch.device('cpu')
+        model.to(device)
 
         executor = ThreadPoolExecutor(max_workers=self.max_workers)
 
         total_images = len(self.images)
-        for index, (image_id, image_path) in enumerate(self.images.items()):
-            print('Image {} of {} (#{})'.format(index + 1, total_images, image_id))
-            result = inference_detector(model, image_path)
-            executor.submit(self.process_result, image_id, result.pred_instances)
+        with torch.inference_mode():
+            for index, (image_id, image_path) in enumerate(self.images.items()):
+                print('Image {} of {} (#{})'.format(index + 1, total_images, image_id))
+                image = np.array(Image.open(image_path).convert('RGB'))
+                image = self.transforms(image=image)['image'].to(device)
+                result = model([image])[0]
+
+                executor.submit(self.process_result, image_id, result)
 
         # Wait for pending jobs of the postprocessing.
         executor.shutdown(True)
 
     def process_result(self, image_id, pred):
         points = []
-        for bbox, score, label in zip(pred.bboxes, pred.scores, pred.labels):
-            x1, y1, x2, y2 = bbox.detach().cpu().numpy()
+        boxes = pred['boxes'].detach().cpu().numpy()
+        for bbox, score, label in zip(boxes, pred['scores'], pred['labels']):
+            x1, y1, x2, y2 = bbox
             r = round(max(x2 - x1, y2 - y1) / 2, 2)
             x = round((x1 + x2) / 2, 2)
             y = round((y1 + y2) / 2, 2)
@@ -74,13 +82,10 @@ def process_result(self, image_id, pred):
         params = json.load(f)
 
     with open(sys.argv[2]) as f:
-        trainset = json.load(f)
-
-    with open(sys.argv[3]) as f:
         train_params = json.load(f)
 
     params['checkpoint_path'] = train_params['checkpoint_path']
-    params['config_path'] = train_params['config_path']
+    params['num_classes'] = train_params['num_classes']
 
-    runner = InferenceRunner(params, trainset)
+    runner = InferenceRunner(params)
     runner.run()