Transparency in letterbox & overlay image; image list for icon annotator

roboflow · Aug 26, 2024 · b08e7f3 · b08e7f3
1 parent c8673fe
commit b08e7f3
Show file tree

Hide file tree

Showing 2 changed files with 80 additions and 54 deletions.
diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
@@ -1,3 +1,4 @@
+from functools import lru_cache
 from math import sqrt
 from typing import List, Optional, Tuple, Union
 
@@ -22,7 +23,12 @@
     ensure_cv2_image_for_annotation,
     ensure_pil_image_for_annotation,
 )
-from supervision.utils.image import crop_image, overlay_image, scale_image
+from supervision.utils.image import (
+    crop_image,
+    letterbox_image,
+    overlay_image,
+    scale_image,
+)
 from supervision.utils.internal import deprecated
 
 
@@ -1399,52 +1405,36 @@ class IconAnnotator(BaseAnnotator):
 
     def __init__(
         self,
-        icon_path: str,
-        position: Position = Position.TOP_CENTER,
-        icon_scale: float = 0.2,
+        icon_resolution_wh: Tuple[int, int] = (64, 64),
+        icon_position: Position = Position.TOP_CENTER,
         offset_xy: Tuple[int, int] = (0, 0),
     ):
         """
         Args:
-            icon_path (str): path to the icon file, in png format.
-            position (Position): The position of the icon. Defaults to
-                `TOP_CENTER`.
-            icon_scale (float): Represents the fraction of the original icon size to
-              be displayed, with a default value of 0.2 (equivalent to 20% of the
-              original size).
+            icon_resolution_wh (Tuple[int, int]): The size of an icon in pixels.
+            icon_position (Position): The position of the icon.
             offset_xy (Tuple[int, int]): The offset to apply to the icon position,
                 in pixels. Can be both positive and negative.
         """
-        self.position = position
-        icon = cv2.imread(icon_path, cv2.IMREAD_UNCHANGED)
-        if icon is None:
-            raise FileNotFoundError(
-                f"Error: Couldn't load the icon image from {icon_path}"
-            )
-
-        resized_icon_h, resized_icon_w = (
-            int(icon.shape[0] * icon_scale),
-            int(icon.shape[1] * icon_scale),
-        )
-        self.icon = cv2.resize(
-            icon, (resized_icon_w, resized_icon_h), interpolation=cv2.INTER_AREA
-        )
+        self.icon_resolution_wh = icon_resolution_wh
+        self.position = icon_position
         self.offset_xy = offset_xy
 
     @ensure_cv2_image_for_annotation
     def annotate(
-        self,
-        scene: ImageType,
-        detections: Detections,
+        self, scene: ImageType, detections: Detections, icon_path: Union[str, List[str]]
     ) -> ImageType:
         """
-        Annotates the given scene with icons based on the provided detections.
+        Annotates the given scene with given icons.
 
         Args:
             scene (ImageType): The image where labels will be drawn.
                 `ImageType` is a flexible type, accepting either `numpy.ndarray`
                 or `PIL.Image.Image`.
             detections (Detections): Object detections to annotate.
+            icon_paths (Union[str, List[str]]): The path to the PNG image to use as an
+                icon. Must be a single path or a list of paths, one for each detection.
+                Pass an empty string `""` to draw nothing.
 
         Returns:
             The annotated image, matching the type of `scene` (`numpy.ndarray`
@@ -1457,43 +1447,59 @@ def annotate(
             image = ...
             detections = sv.Detections(...)
 
-            icon_annotator = sv.IconAnnotator(icon_path='...')
+            icon_paths = []
+            for class_name in detections.data["class_name"]:
+                if class_name == "cat":
+                    icon_paths.append("cat.png")
+                elif class_id == "dog":
+                    icon_paths.append("dog.png")
+                else:
+                    icon_paths.append("")
+
+            icon_annotator = sv.IconAnnotator(icon_resolution_wh=(64, 64))
             annotated_frame = icon_annotator.annotate(
                 scene=image.copy(),
-                detections=detections
+                detections=detections,
+                icon_path=icon_paths
             )
             ```
 
         """
         assert isinstance(scene, np.ndarray)
-        icon_h, icon_w = self.icon.shape[:2]
-        print(self.icon.shape)
-
-        padded_scene = np.pad(
-            scene,
-            ((icon_h, icon_h), (icon_w, icon_w), (0, 0)),
-            mode="constant",
-            constant_values=0,
-        )
+        if isinstance(icon_path, list) and len(icon_path) != len(detections):
+            raise ValueError(
+                f"The number of icon paths provided ({len(icon_path)}) does not match "
+                f"the number of detections ({len(detections)}). Either provide a single"
+                f" icon path or one for each detection."
+            )
 
-        xy = detections.get_anchors_coordinates(anchor=self.position)
-        xy += np.array([icon_w, icon_h])
+        xy = detections.get_anchors_coordinates(anchor=self.position).astype(int)
 
         for detection_idx in range(len(detections)):
+            current_path = (
+                icon_path if isinstance(icon_path, str) else icon_path[detection_idx]
+            )
+            if current_path == "":
+                continue
+            icon = self._load_icon(current_path)
+            icon_h, icon_w = icon.shape[:2]
+
             x = int(xy[detection_idx, 0] - icon_w / 2 + self.offset_xy[0])
             y = int(xy[detection_idx, 1] - icon_h / 2 + self.offset_xy[1])
 
-            alpha_channel = self.icon[:, :, 3]
-            mask = alpha_channel != 0
-
-            padded_scene[y : y + icon_h, x : x + icon_w][mask] = self.icon[:, :, :3][
-                mask
-            ]
-
-        padded_scene = padded_scene[icon_h:-icon_h, icon_w:-icon_w]
-        np.copyto(scene, padded_scene)
+            scene[:] = overlay_image(scene, icon, (x, y))
         return scene
 
+    @lru_cache
+    def _load_icon(self, icon_path: str) -> np.ndarray:
+        icon = cv2.imread(icon_path, cv2.IMREAD_UNCHANGED)
+        if icon is None:
+            raise FileNotFoundError(
+                f"Error: Couldn't load the icon image from {icon_path}"
+            )
+        icon = letterbox_image(image=icon, resolution_wh=self.icon_resolution_wh)
+        return icon
+
 
 class BlurAnnotator(BaseAnnotator):
     """

diff --git a/supervision/utils/image.py b/supervision/utils/image.py
@@ -270,6 +270,7 @@ def letterbox_image(
 
     ![letterbox_image](https://media.roboflow.com/supervision-docs/letterbox-image.png){ align=center width="800" }
     """  # noqa E501 // docs
+    assert isinstance(image, np.ndarray)
     color = unify_to_bgr(color=color)
     resized_image = resize_image(
         image=image, resolution_wh=resolution_wh, keep_aspect_ratio=True
@@ -279,7 +280,7 @@ def letterbox_image(
     padding_bottom = resolution_wh[1] - height_new - padding_top
     padding_left = (resolution_wh[0] - width_new) // 2
     padding_right = resolution_wh[0] - width_new - padding_left
-    return cv2.copyMakeBorder(
+    image_with_borders = cv2.copyMakeBorder(
         resized_image,
         padding_top,
         padding_bottom,
@@ -289,6 +290,14 @@ def letterbox_image(
         value=color,
     )
 
+    if image.shape[2] == 4:
+        image[:padding_top, :, 3] = 0
+        image[height_new - padding_bottom :, :, 3] = 0
+        image[:, :padding_left, 3] = 0
+        image[:, width_new - padding_right :, 3] = 0
+
+    return image_with_borders
+
 
 def overlay_image(
     image: npt.NDArray[np.uint8],
@@ -341,9 +350,20 @@ def overlay_image(
     crop_x_max = image_width - max((anchor_x + image_width) - scene_width, 0)
     crop_y_max = image_height - max((anchor_y + image_height) - scene_height, 0)
 
-    image[y_min:y_max, x_min:x_max] = overlay[
-        crop_y_min:crop_y_max, crop_x_min:crop_x_max
-    ]
+    if overlay.shape[2] == 4:
+        b, g, r, alpha = cv2.split(
+            overlay[crop_y_min:crop_y_max, crop_x_min:crop_x_max]
+        )
+        alpha = alpha[:, :, None] / 255.0
+        overlay_color = cv2.merge((b, g, r))
+
+        roi = image[y_min:y_max, x_min:x_max]
+        roi[:] = roi * (1 - alpha) + overlay_color * alpha
+        image[y_min:y_max, x_min:x_max] = roi
+    else:
+        image[y_min:y_max, x_min:x_max] = overlay[
+            crop_y_min:crop_y_max, crop_x_min:crop_x_max
+        ]
 
     return image