pyannote · benniekiss · Sep 11, 2024
diff --git a/src/pyannote/audio/utils/signal.py b/src/pyannote/audio/utils/signal.py
@@ -269,7 +269,25 @@ def __call__(self, scores: SlidingWindowFeature) -> Annotation:
         frames = scores.sliding_window
         timestamps = [frames[i].middle for i in range(num_frames)]
 
-        # annotation meant to store 'active' regions
+        if self.onset == self.offset:
+            active = self._opt_binarize(scores, timestamps)
+        else:
+            active = self._binarize(scores, timestamps)
+
+        # because of padding, some active regions might be overlapping: merge them.
+        # also: fill same speaker gaps shorter than min_duration_off
+        if self.pad_offset > 0.0 or self.pad_onset > 0.0 or self.min_duration_off > 0.0:
+            active = active.support(collar=self.min_duration_off)
+
+        # remove tracks shorter than min_duration_on
+        if self.min_duration_on > 0:
+            for segment, track in list(active.itertracks()):
+                if segment.duration < self.min_duration_on:
+                    del active[segment, track]
+
+        return active
+
+    def _binarize(self, scores, timestamps):
         active = Annotation()
         track_generator = string_generator()
 
@@ -282,7 +300,6 @@ def __call__(self, scores: SlidingWindowFeature) -> Annotation:
             is_active = k_scores[0] > self.onset
 
             for t, y in zip(timestamps[1:], k_scores[1:]):
-
                 # currently active
                 if is_active:
                     # switching from active to inactive
@@ -304,19 +321,37 @@ def __call__(self, scores: SlidingWindowFeature) -> Annotation:
                 region = Segment(start - self.pad_onset, t + self.pad_offset)
                 active[region, track] = label
 
-        # because of padding, some active regions might be overlapping: merge them.
-        # also: fill same speaker gaps shorter than min_duration_off
-        if self.pad_offset > 0.0 or self.pad_onset > 0.0 or self.min_duration_off > 0.0:
-            active = active.support(collar=self.min_duration_off)
+        return active
 
-        # remove tracks shorter than min_duration_on
-        if self.min_duration_on > 0:
-            for segment, track in list(active.itertracks()):
-                if segment.duration < self.min_duration_on:
-                    del active[segment, track]
+    def _opt_binarize(self, scores, timestamps):
+        active = Annotation()
 
-        return active
+        for k, k_scores in enumerate(scores.data.T):
+            label = k if scores.labels is None else scores.labels[k]
+
+            # Detect transitions
+            is_active = k_scores > self.onset
+            transitions = np.diff(is_active.astype(int))
+            starts = np.where(transitions == 1)[0] + 1
+            ends = np.where(transitions == -1)[0] + 1
 
+            # If the first frame is active, add it as a start
+            if is_active[0]:
+                starts = np.insert(starts, 0, 0)
+
+            # If the last frame is active, add it as an end
+            if is_active[-1]:
+                ends = np.append(ends, len(is_active) - 1)
+
+            # Create segments
+            for start, end in zip(starts, ends):
+                region = Segment(
+                    timestamps[start] - self.pad_onset,
+                    timestamps[end] + self.pad_offset,
+                )
+                active[region, k] = label
+
+        return active
 
 class Peak:
     """Peak detection