Merge branch 'develop'

pythonlessons · pythonlessons · commit 42aa5c8e6ce2 · 2024-03-18T10:05:16.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -14,4 +14,5 @@ dist
 
 test
 build
-yolov8*
+yolov8*
+pyrightconfig.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,8 @@
+## [1.2.3] - 2024-03-17
+### Added
+- Added Tutorial how to run YOLOv8 pretrained Object Detection model `Tutorials.11_Yolov8.README.md`
+
+
 ## [1.2.2] - 2024-03-15
 ### Changed
 - Bug fixed with `loss_info` local variable in `mltu.torch.model.Model` object
diff --git a/README.md b/README.md
@@ -24,4 +24,6 @@ Each tutorial has its own requirements.txt file for a specific mltu version. As
 6. [Introduction to PyTorch in a practical way](https://pylessons.com/pytorch-introduction), code in ```Tutorials\06_pytorch_introduction``` folder;
 7. [Using custom wrapper to simplify PyTorch models training pipeline](https://pylessons.com/pytorch-introduction), code in ```Tutorials\07_pytorch_wrapper``` folder;
 8. [Handwriting words recognition with PyTorch](https://pylessons.com/handwriting-recognition-pytorch), code in ```Tutorials\08_handwriting_recognition_torch``` folder;
-9. [Transformer training with TensorFlow for Translation task](https://pylessons.com/transformers-training), code in ```Tutorials\09_translation_transformer``` folder;
+9. [Transformer training with TensorFlow for Translation task](https://pylessons.com/transformers-training), code in ```Tutorials\09_translation_transformer``` folder;
+10. [Speech Recognition in Python | finetune wav2vec2 model for a custom ASR model](https://youtu.be/h6ooEGzjkj0), code in ```Tutorials\10_wav2vec2_torch``` folder;
+11. [YOLOv8: Real-Time Object Detection Simplified](https://youtu.be/vegL__weCxY), code in ```Tutorials\11_Yolov8``` folder;
diff --git a/Tutorials/11_Yolov8/README.md b/Tutorials/11_Yolov8/README.md
@@ -0,0 +1,139 @@
+# Run Ultralytics YOLOv8 pretrained model
+
+YouTube tutorial link: [YOLOv8: Real-Time Object Detection Simplified](https://youtu.be/vegL__weCxY)
+
+First, I recommend you to install the required packages in a virtual environment:
+```bash
+mltu==1.2.3
+ultralytics==8.1.28
+torch==2.0.0
+torchvision==0.15.1
+onnxruntime==1.15.1
+onnx==1.12.0
+```
+
+### Run the pretrained Ultralytics YOLOv8 within torch environment on webcam:
+```python
+import cv2
+from ultralytics.engine.model import Model as BaseModel
+from mltu.torch.yolo.detectors.torch_detector import Detector as TorchDetector
+
+input_width, input_height = 640, 640
+confidence_threshold = 0.5
+iou_threshold = 0.5
+
+base_model = BaseModel("yolov8m.pt")
+detector = TorchDetector(base_model.model, input_width, input_height, base_model.names, confidence_threshold, iou_threshold)
+
+cap = cv2.VideoCapture(0)
+while True:
+    ret, frame = cap.read()
+    if not ret:
+        break
+
+    # Perform Yolo object detection
+    detections = detector(frame)
+
+    # Apply the detections to the frame
+    frame = detections.applyToFrame(frame)
+
+    # Print the FPS
+    print(detector.fps)
+
+    # Display the output image
+    cv2.imshow("Object Detection", frame)
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+
+cap.release()
+cv2.destroyAllWindows()
+```
+
+## Convert the pretrained model to ONNX:
+```python
+import torch
+from ultralytics.engine.model import Model as BaseModel
+
+base_model = BaseModel("yolov8m.pt")
+
+classes = base_model.names
+input_width, input_height = 640, 640
+input_shape = (1, 3, input_width, input_height)
+model = base_model.model
+
+# place model on cpu
+model.to("cpu")
+
+# set the model to inference mode
+model.eval()
+
+# convert the model to ONNX format
+dummy_input = torch.randn(input_shape).to("cpu")
+
+# Export the model
+torch.onnx.export(
+    model,               
+    dummy_input,                         
+    "yolov8m.onnx",   
+    export_params=True,        
+    input_names = ["input"],   
+    output_names = ["output"], 
+    dynamic_axes = {
+        "input": {0: "batch_size", 2: "height", 3: "width"}, 
+        "output": {0: "batch_size", 2: "anchors"}
+        }
+)
+
+# Add the class names to the model as metadata
+import onnx
+
+metadata = {"classes": classes}
+
+# Load the ONNX model
+onnx_model = onnx.load("yolov8m.onnx")
+
+# Add the metadata dictionary to the onnx model's metadata_props attribute
+for key, value in metadata.items():
+    meta = onnx_model.metadata_props.add()
+    meta.key = key
+    meta.value = str(value)
+
+# Save the modified ONNX model
+onnx.save(onnx_model, "yolov8m.onnx")
+```
+
+## Run the YOLOv8 ONNX model with ONNX Runtime:
+```python
+import cv2
+from ultralytics.engine.model import Model as BaseModel
+from mltu.torch.yolo.detectors.onnx_detector import Detector as OnnxDetector
+
+input_width, input_height = 640, 640
+confidence_threshold = 0.5
+iou_threshold = 0.5
+
+detector = OnnxDetector("yolov8m.onnx", input_width, input_height, confidence_threshold, iou_threshold)
+
+cap = cv2.VideoCapture(0)
+while True:
+    ret, frame = cap.read()
+    if not ret:
+        break
+
+    # Perform Yolo object detection
+    detections = detector(frame)
+
+    # Apply the detections to the frame
+    frame = detections.applyToFrame(frame)
+
+    # Print the FPS
+    print(detector.fps)
+
+    # Display the output image
+    cv2.imshow("Object Detection", frame)
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+
+cap.release()
+cv2.destroyAllWindows()
+```
diff --git a/Tutorials/11_Yolov8/requirements.txt b/Tutorials/11_Yolov8/requirements.txt
@@ -1,5 +1,6 @@
-ultralytics==8.1.9
-torch==2.1.1
+mltu==1.2.3
+ultralytics==8.1.28
+torch==2.0.0
 torchvision==0.15.1
-onnxruntime
-onnx
+onnxruntime==1.15.1
+onnx==1.12.0
diff --git a/Tutorials/11_Yolov8/run_pretrained.py b/Tutorials/11_Yolov8/run_pretrained.py
@@ -3,15 +3,12 @@
 from mltu.torch.yolo.detectors.torch_detector import Detector as TorchDetector
 from mltu.torch.yolo.detectors.onnx_detector import Detector as OnnxDetector
 
-
-classes = {v: v for v in range(80)}
-input_width, input_height = 320, 320
-
+input_width, input_height = 640, 640
 confidence_threshold = 0.5
 iou_threshold = 0.5
 
 # base_model = BaseModel("yolov8m.pt")
-# detector = TorchDetector(base_model.model, input_width, input_height, classes, confidence_threshold, iou_threshold)
+# detector = TorchDetector(base_model.model, input_width, input_height, base_model.names, confidence_threshold, iou_threshold)
 detector = OnnxDetector("yolov8m.onnx", input_width, input_height, confidence_threshold, iou_threshold)
 
 cap = cv2.VideoCapture(0)
@@ -31,7 +28,8 @@
 
     # Display the output image
     cv2.imshow("Object Detection", frame)
-    cv2.waitKey(1)
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
 
 cap.release()
 cv2.destroyAllWindows()
diff --git a/mltu/__init__.py b/mltu/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "1.2.2"
+__version__ = "1.2.3"
 
 from .annotations.images import Image
 from .annotations.images import CVImage
diff --git a/mltu/annotations/detections.py b/mltu/annotations/detections.py
@@ -114,8 +114,8 @@ def validate(self):
             if self.width is None or self.height is None:
                 raise ValueError("width and height must be provided when relative is False")
             
-            if (np.array(self.bbox) > 1.0).any():
-                raise ValueError("bbox coordinates must be in range [0, 1] when relative is False")
+            if not (np.array(self.bbox) > 1.0).any():
+                raise ValueError("bbox coordinates must be in range [0, np.inf] when relative is False")
             
             bbox = np.array(self.bbox) / np.array([self.width, self.height, self.width, self.height])
 
diff --git a/mltu/torch/yolo/detectors/onnx_detector.py b/mltu/torch/yolo/detectors/onnx_detector.py
@@ -49,7 +49,7 @@ def predict(self, image: np.ndarray, **kwargs) -> Detections:
         # Perform inference on the preprocessed image
         preds = self.model.run(self.output_names, {self.input_names[0]: preprocessed_image})
 
-        # Convert torch tensor to numpy array
+        # Extract the results from the predictions
         results = preds[0][0]
 
         # Calculate the scaling factors for the bounding box coordinates

-Original file line number
+Diff line change
 test
 build
 -yolov8*
 +yolov8*
 +pyrightconfig.json
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "1.2.2"`
	`1`	`+__version__ = "1.2.3"`
`2`	`2`
`3`	`3`	`from .annotations.images import Image`
`4`	`4`	`from .annotations.images import CVImage`