Use motion estimator and mask generator when using videos

Agustín Castro · Agustín Castro · commit c5968a0c05e6 · 2024-02-05T12:50:09.000-03:00
diff --git a/norfair/common_reference_ui.py b/norfair/common_reference_ui.py
@@ -45,7 +45,7 @@ def set_reference(
     reference: str,
     footage: str,
     transformation_getter: TransformationGetter = None,
-    mask_generator=None, 
+    mask_generator=None,
     desired_size=700,
     motion_estimator=None,
 ):
@@ -58,11 +58,11 @@ def set_reference(
         To add a point, just click a pair of points (one from the footage window, and another from the reference window) and select "Add"
         To remove a point, just select the corresponding point at the bottom left corner, and select "Remove".
 
-        If either footage or reference are videos, you can jump to future frames to pick points that match. 
+        If either footage or reference are videos, you can jump to future frames to pick points that match.
         For example, to jump 215 frames in the footage, just write an integer number of frames to jump next to 'Frames to skip (footage)', and select "Skip frames".
         A motion estimator can be used to relate the coordinates of the current frame you see (in either footage or reference) to coordinates in its corresponding first frame.
 
-        Once a transformation has been estimated, you can test it: 
+        Once a transformation has been estimated, you can test it:
         To Test your transformation, Select the 'Test' mode, and pick a point in either the reference or the footage, and see the associated point in the other window.
         You can keep adding more associated points until you are satisfied with the estimated transformation
 
@@ -74,16 +74,16 @@ def set_reference(
         Path to the footage image or video
 
      - transformation_getter: TransformationGetter, optional
-        TransformationGetter defining the type of transformation you want to fix between reference and footage. 
-        Since the transformation can be really far from identity (given that the perspectives in footage and reference can be immensely different), 
+        TransformationGetter defining the type of transformation you want to fix between reference and footage.
+        Since the transformation can be really far from identity (given that the perspectives in footage and reference can be immensely different),
         and also knowing that outliers shouldn't be common given that a human is picking the points, it is recommended to use a high ransac_reproj_threshold (~ 1000)
 
-     - mask_generator: optional function that creates a mask (np.ndarray) from a PIL image. This mask is then provided to the corresponding MotionEstimator to avoid 
+     - mask_generator: optional function that creates a mask (np.ndarray) from a PIL image. This mask is then provided to the corresponding MotionEstimator to avoid
         sampling points within the mask.
 
      - desired_size: int, optional
         How large you want the clickable windows in the UI to be.
-    
+
      - motion_estimator: MotionEstimator, optional
         When using videos for either the footage or the reference, you can provide a MotionEstimator to relate the coordinates in all the frames in the video.
         The motion estimator is only useful if the camera in either the video of the footage or the video of the reference can move. Otherwise, avoid using it.
@@ -176,15 +176,22 @@ def estimate_transformation(points):
             return None
 
     def test_transformation(
-        change_of_coordinates, canvas, point, original_size, canvas_size, motion_transformation=None,
+        change_of_coordinates,
+        canvas,
+        point,
+        original_size,
+        canvas_size,
+        motion_transformation=None,
     ):
         point_in_new_coordinates = change_of_coordinates(np.array([point]))[0]
 
         try:
-            point_in_new_coordinates = motion_transformation.abs_to_rel(np.array([point_in_new_coordinates]))[0]
+            point_in_new_coordinates = motion_transformation.abs_to_rel(
+                np.array([point_in_new_coordinates])
+            )[0]
         except AttributeError:
             pass
-            
+
         point_in_canvas_coordinates = np.multiply(
             point_in_new_coordinates,
             np.array(
@@ -256,37 +263,46 @@ def handle_annotation(event):
             global reference_canvas_size
             global footage_original_size
             global footage_canvas_size
-            
+
             points[key]["marked"] = not points[key]["marked"]
 
             if points[key]["marked"]:
                 points[key]["button"].configure(fg="black", highlightbackground="red")
 
                 try:
-                    footage_point_in_rel_coords = skipper["footage"]["motion_transformation"].abs_to_rel(np.array([points[key]["footage"]]))[0]
+                    footage_point_in_rel_coords = skipper["footage"][
+                        "motion_transformation"
+                    ].abs_to_rel(np.array([points[key]["footage"]]))[0]
                     footage_point_in_rel_coords = np.multiply(
                         footage_point_in_rel_coords,
                         np.array(
-                            [footage_canvas_size[0] / footage_original_size[0], footage_canvas_size[1] / footage_original_size[1]]
+                            [
+                                footage_canvas_size[0] / footage_original_size[0],
+                                footage_canvas_size[1] / footage_original_size[1],
+                            ]
                         ),
                     ).astype(int)
                 except AttributeError:
                     footage_point_in_rel_coords = points[key]["footage_canvas"]
                     pass
-                
+
                 try:
-                    reference_point_in_rel_coords = skipper["reference"]["motion_transformation"].abs_to_rel(np.array([points[key]["footage"]]))[0]
+                    reference_point_in_rel_coords = skipper["reference"][
+                        "motion_transformation"
+                    ].abs_to_rel(np.array([points[key]["footage"]]))[0]
                     reference_point_in_rel_coords = np.multiply(
                         reference_point_in_rel_coords,
                         np.array(
-                            [reference_canvas_size[0] / reference_original_size[0], reference_canvas_size[1] / reference_original_size[1]]
+                            [
+                                reference_canvas_size[0] / reference_original_size[0],
+                                reference_canvas_size[1] / reference_original_size[1],
+                            ]
                         ),
                     ).astype(int)
                 except AttributeError:
                     reference_point_in_rel_coords = points[key]["reference_canvas"]
                     pass
 
-                
                 draw_point_in_canvas(
                     canvas_footage, footage_point_in_rel_coords, color="red"
                 )
@@ -348,7 +364,9 @@ def handle_annotation(event):
                 mask = mask_generator(image)
             else:
                 mask = None
-            motion_transformation = motion_estimator_footage.update(np.array(image), mask)
+            motion_transformation = motion_estimator_footage.update(
+                np.array(image), mask
+            )
 
     footage_original_width = image.width
     footage_original_height = image.height
@@ -379,14 +397,20 @@ def reference_coord_chosen_in_footage(event):
         footage_point_canvas = (event.x, event.y)
         draw_point_in_canvas(canvas_footage, footage_point_canvas)
 
-
         footage_point = np.array(
-            [event.x * (footage_original_width / footage_canvas_width), event.y * (footage_original_height / footage_canvas_height)]
+            [
+                event.x * (footage_original_width / footage_canvas_width),
+                event.y * (footage_original_height / footage_canvas_height),
+            ]
         )
         print("Footage window clicked at: ", footage_point.round(1))
 
         try:
-            footage_point = skipper["footage"]["motion_transformation"].rel_to_abs(np.array([footage_point]))[0].round(1)
+            footage_point = (
+                skipper["footage"]["motion_transformation"]
+                .rel_to_abs(np.array([footage_point]))[0]
+                .round(1)
+            )
         except AttributeError:
             pass
 
@@ -420,7 +444,6 @@ def reference_coord_chosen_in_footage(event):
         "current_frame_label": None,
     }
 
-
     motion_estimator_reference = None
     motion_transformation = None
     try:
@@ -467,18 +490,23 @@ def reference_coord_chosen_in_reference(event):
         global footage_canvas_size
         global skipper
 
-
-
         reference_point_canvas = (event.x, event.y)
         draw_point_in_canvas(canvas_reference, reference_point_canvas)
 
         reference_point = np.array(
-            [event.x * (reference_original_width / reference_canvas_width), event.y * (reference_original_height / reference_canvas_height)]
+            [
+                event.x * (reference_original_width / reference_canvas_width),
+                event.y * (reference_original_height / reference_canvas_height),
+            ]
         )
         print("Reference window clicked at: ", reference_point.round(1))
 
         try:
-            reference_point = skipper["reference"]["motion_transformation"].rel_to_abs(np.array([reference_point]))[0].round(1)
+            reference_point = (
+                skipper["reference"]["motion_transformation"]
+                .rel_to_abs(np.array([reference_point]))[0]
+                .round(1)
+            )
         except AttributeError:
             pass
 
@@ -560,8 +588,10 @@ def handle_skip_frame(event):
                         mask = mask_generator(image)
                     else:
                         mask = None
-                    motion_transformation = motion_estimator.update(np.array(image), mask)
-            
+                    motion_transformation = motion_estimator.update(
+                        np.array(image), mask
+                    )
+
             skipper[video_type]["motion_estimator"] = motion_estimator
             skipper[video_type]["motion_transformation"] = motion_transformation