-
-
Notifications
You must be signed in to change notification settings - Fork 138
Expand file tree
/
Copy pathtest_transcriber.py
More file actions
63 lines (50 loc) · 2.39 KB
/
test_transcriber.py
File metadata and controls
63 lines (50 loc) · 2.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import unittest
from pathlib import Path
from unittest.mock import Mock
from src.transcriber import WhisperTranscriber
class WhisperTranscriberAutoLanguageTests(unittest.TestCase):
def _build_transcriber(self, model: Mock) -> WhisperTranscriber:
audio_path = Path("/tmp/stenoai-test.wav")
transcriber = WhisperTranscriber.__new__(WhisperTranscriber)
transcriber.model = model
transcriber.backend = "whisper.cpp"
transcriber._convert_to_16khz = Mock(return_value=(audio_path, 12.3))
return transcriber
def test_auto_mode_uses_detected_language(self):
model = Mock()
model.auto_detect_language.return_value = (("nl", 0.97), {"nl": 0.97})
segment = Mock()
segment.text = " Hallo "
model.transcribe.return_value = [segment]
transcriber = self._build_transcriber(model)
result = transcriber._transcribe_whisper_cpp(Path("/tmp/stenoai-test.wav"), language="auto")
self.assertEqual(result["text"], "Hallo")
self.assertEqual(result["detected_language"], "nl")
self.assertEqual(model.transcribe.call_args.kwargs.get("language"), "nl")
def test_auto_mode_falls_back_when_detection_fails(self):
model = Mock()
model.auto_detect_language.side_effect = RuntimeError("detection failed")
segment = Mock()
segment.text = " Hello "
model.transcribe.return_value = [segment]
transcriber = self._build_transcriber(model)
result = transcriber._transcribe_whisper_cpp(Path("/tmp/stenoai-test.wav"), language="auto")
self.assertEqual(result["text"], "Hello")
self.assertIsNone(result["detected_language"])
self.assertNotIn("language", model.transcribe.call_args.kwargs)
def test_explicit_language_skips_auto_detection(self):
model = Mock()
segment = Mock()
segment.text = " Bonjour "
model.transcribe.return_value = [segment]
transcriber = self._build_transcriber(model)
result = transcriber._transcribe_whisper_cpp(
Path("/tmp/stenoai-test.wav"),
language="fr",
)
self.assertEqual(result["text"], "Bonjour")
self.assertIsNone(result["detected_language"])
model.auto_detect_language.assert_not_called()
self.assertEqual(model.transcribe.call_args.kwargs.get("language"), "fr")
if __name__ == "__main__":
unittest.main()