Skip to content

Commit 7f271df

Browse files
authoredJan 29, 2025
add FasterWhisper as new default provider in favor of whispercpp (#279)
* implement FasterWhisper as first-class STT provider * untie whispercpp in favor of fasterwhisper * add OS acknowledgement
1 parent 18518fe commit 7f271df

17 files changed

+518
-278
lines changed
 

‎README.md

+6-3
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ Since version 2.0, Wingman AI Core acts as a "backend" API (using FastAPI and Py
4141
- OpenAI Whisper
4242
- Azure Whisper
4343
- Azure Speech
44-
- whispercpp (local, bundled with Wingman AI)
44+
- whispercpp (local, needs to be installed separately)
45+
- FasterWhisper (local, default)
4546
- Wingman Pro (Azure Speech or Azure Whisper)
4647
- **Text-to-speech** (TTS) providers:
4748
- OpenAI TTS
@@ -138,9 +139,9 @@ You can use any LLM offering an OpenAI-compatible API and connect it to Wingman
138139
### Windows
139140

140141
- Download the installer of the latest version from [wingman-ai.com](https://www.wingman-ai.com).
142+
- If you have an NVIDIA RTX GPU, install the latest [CUDA driver](https://developer.nvidia.com/cuda-downloads) from NVIDIA to speed up the transcription process significantly.
141143
- Install it to a directory of your choice and start the client `Wingman AI.exe`.
142144
- The client will will auto-start `Wingman AI Core.exe` in the background
143-
- The client will auto-start `whispercpp` in the background. If you have an NVIDIA RTX GPU, install the latest CUDA driver from NVIDIA and enable GPU acceleration in the Settings view.
144145

145146
If that doesn't work for some reason, try starting `Wingman AI Core.exe` manually and check the terminal or your **logs** directory for errors.
146147

@@ -271,7 +272,9 @@ We would like to thank their creators for their great work and contributions to
271272
- [scipy](https://github.com/scipy/scipy) - BSD 3, © 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers
272273
- [sounddevice](https://github.com/spatialaudio/python-sounddevice/) - MIT, © 2015-2023 Matthias Geier
273274
- [soundfile](https://github.com/bastibe/python-soundfile) - BSD 3, © 2013 Bastian Bechtold
274-
- [uvicorn](https://github.com/encode/uvicorn) - BSD 3, © 2017-presen, Encode OSS Ltd. All rights reserved.
275+
- [uvicorn](https://github.com/encode/uvicorn) - BSD 3, © 2017-present, Encode OSS Ltd. All rights reserved.
276+
- [whispercpp](https://github.com/ggerganov/whisper.cpp) - MIT, © 2023-2024 The ggml authors
277+
- [FasterWhisper](https://github.com/SYSTRAN/faster-whisper) - MIT, © 2023 SYSTRAN
275278

276279
### Individual persons
277280

‎api/enums.py

+7
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ class MistralModel(Enum):
7979
MISTRAL_MEDIUM = "mistral-medium-latest"
8080
MISTRAL_LARGE = "mistral-large-latest"
8181

82+
8283
class PerplexityModel(Enum):
8384
"""https://docs.perplexity.ai/guides/model-cards"""
8485

@@ -90,6 +91,7 @@ class PerplexityModel(Enum):
9091
LLAMA3_8B = "llama-3.1-8b-instruct"
9192
LLAMA3_70B = "llama-3.1-70b-instruct"
9293

94+
9395
class GoogleAiModel(Enum):
9496
GEMINI_1_5_FLASH = "gemini-1.5-flash"
9597
GEMINI_1_5_PRO = "gemini-1.5-pro"
@@ -136,13 +138,15 @@ class SttProvider(Enum):
136138
AZURE = "azure"
137139
AZURE_SPEECH = "azure_speech"
138140
WHISPERCPP = "whispercpp"
141+
FASTER_WHISPER = "fasterwhisper"
139142
WINGMAN_PRO = "wingman_pro"
140143

141144

142145
class VoiceActivationSttProvider(Enum):
143146
OPENAI = "openai"
144147
AZURE = "azure"
145148
WHISPERCPP = "whispercpp"
149+
FASTER_WHISPER = "fasterwhisper"
146150
WINGMAN_PRO = "wingman_pro"
147151

148152

@@ -175,6 +179,7 @@ class RecordingDevice(Enum):
175179
MOUSE = "mouse"
176180
JOYSTICK = "joystick"
177181

182+
178183
class WingmanProRegion(Enum):
179184
EUROPE = "europe"
180185
USA = "usa"
@@ -246,9 +251,11 @@ class TtsVoiceGenderEnumModel(BaseEnumModel):
246251
class MistralModelEnumModel(BaseEnumModel):
247252
model: MistralModel
248253

254+
249255
class PerplexityModelEnumModel(BaseEnumModel):
250256
model: PerplexityModel
251257

258+
252259
class GoogleAiModelEnumModel(BaseEnumModel):
253260
model: GoogleAiModel
254261

‎api/interface.py

+32-4
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,19 @@ class AudioSettings(BaseModel):
111111

112112

113113
class WhispercppSettings(BaseModel):
114+
enable: bool
114115
host: str
115116
port: int
116-
model: str
117-
language: str
118-
translate_to_english: bool
119-
use_cuda: bool
117+
118+
119+
class FasterWhisperSettings(BaseModel):
120+
model_config = ConfigDict(protected_namespaces=())
121+
"""tiny, tiny.en, base, base.en, small, small.en, distil-small.en, medium, medium.en, distil-medium.en, large-v1, large-v2, large-v3, large, distil-large-v2, distil-large-v3, large-v3-turbo, or turbo"""
122+
model_size: str
123+
"""default (model original), auto (fastest available on device), int8, int8_float16 etc. - see https://opennmt.net/CTranslate2/quantization.html#quantize-on-model-conversion"""
124+
compute_type: str
125+
"""cpu, cuda, auto"""
126+
device: str
120127

121128

122129
class XVASynthSettings(BaseModel):
@@ -133,9 +140,25 @@ class WhispercppSttConfig(BaseModel):
133140
temperature: float
134141

135142

143+
class FasterWhisperSttConfig(BaseModel):
144+
beam_size: int
145+
language: Optional[str] = None
146+
hotwords: Optional[str] = None
147+
best_of: int
148+
temperature: float
149+
no_speech_threshold: float
150+
multilingual: bool
151+
language_detection_threshold: float
152+
153+
136154
class WhispercppTranscript(BaseModel):
137155
text: str
156+
157+
158+
class FasterWhisperTranscript(BaseModel):
159+
text: str
138160
language: str
161+
language_probability: float
139162

140163

141164
class AzureInstanceConfig(BaseModel):
@@ -387,7 +410,9 @@ class VoiceActivationSettings(BaseModel):
387410

388411
azure: AzureSttConfig
389412
whispercpp: WhispercppSettings
413+
fasterwhisper: FasterWhisperSettings
390414
whispercpp_config: WhispercppSttConfig
415+
fasterwhisper_config: FasterWhisperSttConfig
391416

392417

393418
class FeaturesConfig(BaseModel):
@@ -470,6 +495,7 @@ class CommandJoystickConfig(BaseModel):
470495
guid: Optional[str] = None
471496
"""The joystick GUID to use. Optional."""
472497

498+
473499
class CommandActionConfig(BaseModel):
474500
keyboard: Optional[CommandKeyboardConfig] = None
475501
"""The keyboard configuration for this action. Optional."""
@@ -618,6 +644,7 @@ class NestedConfig(BaseModel):
618644
azure: AzureConfig
619645
xvasynth: XVASynthTtsConfig
620646
whispercpp: WhispercppSttConfig
647+
fasterwhisper: FasterWhisperSttConfig
621648
wingman_pro: WingmanProConfig
622649
perplexity: PerplexityConfig
623650
commands: Optional[list[CommandConfig]] = None
@@ -650,6 +677,7 @@ class BasicWingmanConfig(BaseModel):
650677
azure: AzureConfig
651678
xvasynth: XVASynthTtsConfig
652679
whispercpp: WhispercppSttConfig
680+
fasterwhisper: FasterWhisperSttConfig
653681
wingman_pro: WingmanProConfig
654682
perplexity: PerplexityConfig
655683

‎providers/faster_whisper.py

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
from faster_whisper import WhisperModel
2+
from api.interface import (
3+
FasterWhisperSettings,
4+
FasterWhisperTranscript,
5+
FasterWhisperSttConfig,
6+
WingmanInitializationError,
7+
)
8+
from services.printr import Printr
9+
10+
11+
class FasterWhisper:
12+
def __init__(
13+
self,
14+
settings: FasterWhisperSettings,
15+
):
16+
self.settings = settings
17+
self.current_model = None
18+
self.printr = Printr()
19+
self.model = WhisperModel(
20+
settings.model_size,
21+
device=settings.device,
22+
compute_type=settings.compute_type,
23+
)
24+
25+
def transcribe(
26+
self,
27+
config: FasterWhisperSttConfig,
28+
wingman_name: str,
29+
filename: str,
30+
):
31+
try:
32+
segments, info = self.model.transcribe(
33+
filename,
34+
without_timestamps=True,
35+
beam_size=config.beam_size,
36+
best_of=config.best_of,
37+
temperature=config.temperature,
38+
hotwords=(
39+
wingman_name
40+
if not config.hotwords
41+
else ",".join([wingman_name, config.hotwords])
42+
),
43+
no_speech_threshold=config.no_speech_threshold,
44+
language=config.language,
45+
multilingual=False if config.language else config.multilingual,
46+
language_detection_threshold=(
47+
None if config.language else config.language_detection_threshold
48+
),
49+
)
50+
segments = list(segments)
51+
text = ""
52+
for segment in segments:
53+
text += segment.text.strip()
54+
55+
return FasterWhisperTranscript(
56+
text=text,
57+
language=info.language,
58+
language_probability=info.language_probability,
59+
)
60+
61+
except FileNotFoundError:
62+
self.printr.toast_error(
63+
f"FasterWhisper file to transcript'{filename}' not found."
64+
)
65+
66+
def update_settings(self, settings: FasterWhisperSettings):
67+
self.settings = settings
68+
self.model = WhisperModel(
69+
settings.model_size,
70+
device=settings.device,
71+
compute_type=settings.compute_type,
72+
)
73+
self.printr.print("FasterWhisper settings updated.", server_only=True)
74+
75+
def validate(self, errors: list[WingmanInitializationError]):
76+
pass

‎providers/whispercpp.py

+25-153
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,34 @@
1-
from time import sleep
2-
from os import path
3-
import platform
4-
import subprocess
51
import requests
6-
from api.enums import LogType
7-
from api.interface import WhispercppSettings, WhispercppSttConfig, WhispercppTranscript
2+
from api.enums import LogType, WingmanInitializationErrorType
3+
from api.interface import (
4+
WhispercppSettings,
5+
WhispercppSttConfig,
6+
WhispercppTranscript,
7+
WingmanInitializationError,
8+
)
89
from services.printr import Printr
910

10-
STANDARD_DIR = "whispercpp"
11-
CUDA_DIR = "whispercpp-cuda"
12-
MODELS_DIR = "whispercpp-models"
13-
SERVER_EXE = "whisper-server.exe"
14-
1511

1612
class Whispercpp:
1713
def __init__(
1814
self,
1915
settings: WhispercppSettings,
20-
app_root_path: str,
21-
app_is_bundled: bool,
2216
):
2317
self.settings = settings
24-
self.current_model = None
25-
self.running_process = None
2618
self.printr = Printr()
2719

28-
self.is_windows = platform.system() == "Windows"
29-
if self.is_windows:
30-
# move one dir up, out of _internal (if bundled)
31-
app_dir = path.dirname(app_root_path) if app_is_bundled else app_root_path
32-
self.models_dir = path.join(app_dir, MODELS_DIR)
33-
self.standard_dir = path.join(app_dir, STANDARD_DIR)
34-
self.cuda_dir = path.join(app_dir, CUDA_DIR)
35-
36-
if self.__validate():
37-
self.start_server()
38-
else:
39-
self.__validate()
40-
4120
def transcribe(
4221
self,
4322
filename: str,
4423
config: WhispercppSttConfig,
4524
response_format: str = "json",
4625
timeout: int = 10,
4726
):
27+
if not self.settings.enable:
28+
self.printr.toast_error(
29+
text="Whispercpp must be enabled and configured in the Settings view."
30+
)
31+
return None
4832
try:
4933
with open(filename, "rb") as file:
5034
response = requests.post(
@@ -60,7 +44,6 @@ def transcribe(
6044
# Wrap response.json = {"text":"transcription"} into a Pydantic model for typesafe further processing
6145
return WhispercppTranscript(
6246
text=response.json()["text"].strip(),
63-
language=self.settings.language,
6447
)
6548
except requests.HTTPError as e:
6649
self.printr.toast_error(
@@ -77,136 +60,25 @@ def transcribe(
7760
f"whispercpp file to transcript'{filename}' not found."
7861
)
7962

80-
def start_server(self):
81-
if self.__is_server_running() or not self.is_windows:
82-
self.printr.print(
83-
f"whispercpp connected on {self.settings.host}:{self.settings.port}. Using model '{self.settings.model}' and language '{self.settings.language}'.",
84-
server_only=True,
85-
color=LogType.HIGHLIGHT,
86-
)
87-
return True
88-
89-
args = [
90-
path.join(
91-
self.cuda_dir if self.settings.use_cuda else self.standard_dir,
92-
SERVER_EXE,
93-
),
94-
"--port",
95-
str(self.settings.port),
96-
"-m",
97-
path.join(self.models_dir, self.settings.model),
98-
"-l",
99-
self.settings.language,
100-
]
101-
if self.settings.translate_to_english:
102-
args.append("-tr")
103-
104-
try:
105-
self.stop_server()
106-
self.running_process = subprocess.Popen(args)
107-
self.current_model = self.settings.model
108-
sleep(2)
109-
is_running = self.__is_server_running()
110-
if is_running:
111-
self.printr.print(
112-
f"whispercpp server started on {self.settings.host}:{self.settings.port}.",
113-
server_only=True,
114-
color=LogType.HIGHLIGHT,
115-
)
116-
else:
117-
self.printr.toast_error(
118-
text="Failed to start whispercpp server. Please start it manually."
119-
)
120-
return is_running
121-
except Exception:
122-
self.printr.toast_error(
123-
text="Failed to start whispercpp server. Please start it manually."
124-
)
125-
return False
126-
127-
def stop_server(self):
128-
if self.running_process:
129-
self.running_process.kill()
130-
self.running_process.wait()
131-
self.running_process = None
132-
self.printr.print(
133-
"whispercpp server stopped.", server_only=True, color=LogType.HIGHLIGHT
134-
)
135-
13663
def update_settings(self, settings: WhispercppSettings):
137-
requires_restart = (
138-
self.settings.host != settings.host
139-
or self.settings.port != settings.port
140-
or self.settings.use_cuda != settings.use_cuda
141-
or self.settings.language != settings.language
142-
or self.settings.translate_to_english != settings.translate_to_english
143-
)
144-
if self.__validate():
145-
self.settings = settings
146-
147-
if requires_restart:
148-
self.stop_server()
149-
self.start_server()
150-
else:
151-
self.change_model()
152-
153-
self.printr.print("whispercpp settings updated.", server_only=True)
154-
155-
def change_model(self, timeout=10):
156-
if not self.is_windows:
157-
return
158-
159-
if self.current_model != self.settings.model:
160-
response = requests.post(
161-
f"{self.settings.host}:{self.settings.port}/load",
162-
data={"model": path.join(self.models_dir, self.settings.model)},
163-
timeout=timeout,
164-
)
165-
response.raise_for_status()
166-
self.current_model = self.settings.model
167-
168-
def __validate(self):
169-
if not self.is_windows:
170-
if not self.__is_server_running():
171-
self.printr.print(
172-
text=f"Please start whispercpp server manually on {self.settings.host}:{self.settings.port}.",
173-
color=LogType.ERROR,
174-
server_only=True,
175-
)
176-
return False
177-
else:
178-
self.printr.print(
179-
text=f"whispercpp connected on {self.settings.host}:{self.settings.port} (manually started on non-Windows OS).",
180-
color=LogType.HIGHLIGHT,
181-
server_only=True,
64+
self.settings = settings
65+
self.printr.print("whispercpp settings updated.", server_only=True)
66+
67+
def validate(self, wingman_name: str, errors: list[WingmanInitializationError]):
68+
if not self.__is_server_running():
69+
errors.append(
70+
WingmanInitializationError(
71+
wingman_name=wingman_name,
72+
message=f"Please start whispercpp server manually on {self.settings.host}:{self.settings.port}, then restart Wingman AI.",
73+
error_type=WingmanInitializationErrorType.INVALID_CONFIG,
18274
)
183-
return True
184-
185-
# On Windows:
186-
model_path = path.join(self.models_dir, self.settings.model)
187-
if not path.exists(model_path):
188-
self.printr.print(
189-
text=f"whispercpp is missing model file '{model_path}'.",
190-
color=LogType.ERROR,
191-
server_only=True,
192-
)
193-
return False
194-
if not path.exists(self.cuda_dir):
195-
self.printr.print(
196-
text=f"whispercpp is missing directory '{self.cuda_dir}'.",
197-
color=LogType.ERROR,
198-
server_only=True,
19975
)
200-
return False
201-
if not path.exists(self.standard_dir):
76+
else:
20277
self.printr.print(
203-
text=f"whispercpp is missing directory '{self.standard_dir}'.",
204-
color=LogType.ERROR,
78+
text=f"whispercpp connected on {self.settings.host}:{self.settings.port}.",
79+
color=LogType.HIGHLIGHT,
20580
server_only=True,
20681
)
207-
return False
208-
209-
return True
21082

21183
def __is_server_running(self, timeout=5):
21284
try:

‎requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ azure-cognitiveservices-speech==1.42.0
22
edge-tts==7.0.0
33
elevenlabslib==0.31.3
44
fastapi==0.115.7
5+
faster_whisper==1.1.1
56
google-generativeai==0.8.4
67
markdown==3.7
78
numpy==1.26.4

‎services/config_manager.py

+1
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,7 @@ def merge_configs(self, default: Config, wingman):
952952
"elevenlabs",
953953
"azure",
954954
"whispercpp",
955+
"fasterwhisper",
955956
"xvasynth",
956957
"wingman_pro",
957958
"perplexity",

‎services/config_migration_service.py

+25-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def migrate_settings(old: dict, new: dict) -> dict:
138138
self.log("- applied new split whispercpp settings/config structure")
139139

140140
old["xvasynth"] = new["xvasynth"]
141-
self.log("- adding new XVASynth settings")
141+
self.log("- added new XVASynth settings")
142142

143143
old.pop("audio", None)
144144
self.log("- removed audio device settings because DirectSound was removed")
@@ -348,12 +348,36 @@ def migrate_wingman(old: dict, new: Optional[dict]) -> dict:
348348

349349
def migrate_162_to_170(self):
350350
def migrate_settings(old: dict, new: dict) -> dict:
351+
old["voice_activation"]["whispercpp"].pop("use_cuda", None)
352+
old["voice_activation"]["whispercpp"].pop("language", None)
353+
old["voice_activation"]["whispercpp"].pop("translate_to_english", None)
354+
self.log("- removed old whispercpp settings (if there were any)")
355+
356+
old["voice_activation"]["whispercpp"]["enable"] = False
357+
self.log("- disabled whispercpp by default")
358+
359+
old["voice_activation"]["fasterwhisper"] = new["voice_activation"][
360+
"fasterwhisper"
361+
]
362+
old["voice_activation"]["fasterwhisper_config"] = new["voice_activation"][
363+
"fasterwhisper_config"
364+
]
365+
self.log("- added new fasterwhisper settings and config")
351366
return old
352367

353368
def migrate_defaults(old: dict, new: dict) -> dict:
369+
old["fasterwhisper"] = new["fasterwhisper"]
370+
self.log("- added new properties: fasterwhisper")
371+
372+
old["features"]["stt_provider"] = "fasterwhisper"
373+
self.log("- made fasterwhisper new default STT provider")
374+
354375
return old
355376

356377
def migrate_wingman(old: dict, new: Optional[dict]) -> dict:
378+
if old.get("features", {}).get("stt_provider") == "whispercpp":
379+
old["features"]["stt_provider"] = "fasterwhisper"
380+
self.log("- changed STT provider from whispercpp to fasterwhisper")
357381
return old
358382

359383
self.migrate(

‎services/config_service.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -370,9 +370,11 @@ async def save_basic_wingman_config(
370370
except ValueError:
371371
wingman_config.azure.tts.voice = basic_config.voice
372372

373-
reload_config = (wingman_config.record_joystick_button != basic_config.record_joystick_button
374-
or wingman_config.record_mouse_button != basic_config.record_mouse_button
375-
or wingman_file.name != wingman_config.name)
373+
reload_config = (
374+
wingman_config.record_joystick_button != basic_config.record_joystick_button
375+
or wingman_config.record_mouse_button != basic_config.record_mouse_button
376+
or wingman_file.name != wingman_config.name
377+
)
376378

377379
wingman_config.record_joystick_button = basic_config.record_joystick_button
378380
wingman_config.record_mouse_button = basic_config.record_mouse_button
@@ -390,6 +392,7 @@ async def save_basic_wingman_config(
390392
wingman_config.azure = basic_config.azure
391393
wingman_config.xvasynth = basic_config.xvasynth
392394
wingman_config.whispercpp = basic_config.whispercpp
395+
wingman_config.fasterwhisper = basic_config.fasterwhisper
393396
wingman_config.wingman_pro = basic_config.wingman_pro
394397
wingman_config.perplexity = basic_config.perplexity
395398

‎services/module_manager.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
SkillConfig,
1313
WingmanConfig,
1414
)
15+
from providers.faster_whisper import FasterWhisper
1516
from providers.whispercpp import Whispercpp
1617
from providers.xvasynth import XVASynth
1718
from services.audio_library import AudioLibrary
@@ -54,6 +55,7 @@ def create_wingman_dynamically(
5455
audio_player: AudioPlayer,
5556
audio_library: AudioLibrary,
5657
whispercpp: Whispercpp,
58+
fasterwhisper: FasterWhisper,
5759
xvasynth: XVASynth,
5860
tower: "Tower",
5961
):
@@ -66,6 +68,7 @@ def create_wingman_dynamically(
6668
audio_player (AudioPlayer): The audio player handling the playback of audio files.
6769
audio_library (AudioLibrary): The audio library handling the storage and retrieval of audio files.
6870
whispercpp (Whispercpp): The Whispercpp provider for speech-to-text.
71+
fasterwhisper (FasterWhisper): The FasterWhisper provider for speech-to-text.
6972
xvasynth (XVASynth): The XVASynth provider for text-to-speech.
7073
tower (Tower): The Tower instance, that manages loaded Wingmen.
7174
"""
@@ -91,6 +94,7 @@ def create_wingman_dynamically(
9194
audio_player=audio_player,
9295
audio_library=audio_library,
9396
whispercpp=whispercpp,
97+
fasterwhisper=fasterwhisper,
9498
xvasynth=xvasynth,
9599
tower=tower,
96100
)
@@ -151,9 +155,7 @@ def add_to_sys_path(path_to_add: str):
151155

152156
@staticmethod
153157
def read_available_skill_configs() -> list[tuple[str, str]]:
154-
skill_dirs = [
155-
get_writable_dir(SKILLS_DIR)
156-
]
158+
skill_dirs = [get_writable_dir(SKILLS_DIR)]
157159

158160
if os.path.isdir(SKILLS_DIR):
159161
skill_dirs.append(SKILLS_DIR)
@@ -168,12 +170,16 @@ def read_available_skill_configs() -> list[tuple[str, str]]:
168170
# Check if the path is a directory (to avoid non-folder files)
169171
if os.path.isdir(skill_path):
170172
# Construct the path to the default_config.yaml file
171-
default_config_path = os.path.join(skill_path, "default_config.yaml")
173+
default_config_path = os.path.join(
174+
skill_path, "default_config.yaml"
175+
)
172176

173177
# Check if the default_config.yaml file exists
174178
if os.path.isfile(default_config_path):
175179
# Add the skill name and the default_config.yaml file path to the list
176-
skills_default_configs.update({skill_name: (skill_name, default_config_path)})
180+
skills_default_configs.update(
181+
{skill_name: (skill_name, default_config_path)}
182+
)
177183

178184
return list(skills_default_configs.values())
179185

‎services/settings_service.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
AudioDeviceSettings,
99
SettingsConfig,
1010
)
11+
from providers.faster_whisper import FasterWhisper
1112
from providers.whispercpp import Whispercpp
1213
from providers.xvasynth import XVASynth
1314
from services.config_manager import ConfigManager
@@ -25,6 +26,7 @@ def __init__(self, config_manager: ConfigManager, config_service: ConfigService)
2526
self.settings = self.get_settings()
2627
self.settings_events = PubSub()
2728
self.whispercpp: Whispercpp = None
29+
self.fasterwhisper: FasterWhisper = None
2830
self.xvasynth: XVASynth = None
2931

3032
self.router = APIRouter()
@@ -44,8 +46,11 @@ def __init__(self, config_manager: ConfigManager, config_service: ConfigService)
4446
tags=tags,
4547
)
4648

47-
def initialize(self, whispercpp: Whispercpp, xvasynth: XVASynth):
49+
def initialize(
50+
self, whispercpp: Whispercpp, fasterwhisper: FasterWhisper, xvasynth: XVASynth
51+
):
4852
self.whispercpp = whispercpp
53+
self.fasterwhisper = fasterwhisper
4954
self.xvasynth = xvasynth
5055

5156
# GET /settings
@@ -84,6 +89,16 @@ async def save_settings(self, settings: SettingsConfig):
8489
return
8590
self.whispercpp.update_settings(settings=settings.voice_activation.whispercpp)
8691

92+
# FasterWhisper
93+
if not self.fasterwhisper:
94+
self.printr.toast_error(
95+
"FasterWhisper is not initialized. Please run SettingsService.initialize()",
96+
)
97+
return
98+
self.fasterwhisper.update_settings(
99+
settings=settings.voice_activation.fasterwhisper
100+
)
101+
87102
# XVASynth
88103
if not self.xvasynth:
89104
self.printr.toast_error(

‎services/tower.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
WingmanInitializationError,
77
ConfigDirInfo,
88
)
9+
from providers.faster_whisper import FasterWhisper
910
from providers.whispercpp import Whispercpp
1011
from providers.xvasynth import XVASynth
1112
from services.audio_player import AudioPlayer
@@ -29,6 +30,7 @@ def __init__(
2930
audio_player: AudioPlayer,
3031
audio_library: AudioLibrary,
3132
whispercpp: Whispercpp,
33+
fasterwhisper: FasterWhisper,
3234
xvasynth: XVASynth,
3335
):
3436
self.audio_player = audio_player
@@ -40,6 +42,7 @@ def __init__(
4042
self.disabled_wingmen: list[WingmanConfig] = []
4143
self.log_source_name = "Tower"
4244
self.whispercpp = whispercpp
45+
self.fasterwhisper = fasterwhisper
4346
self.xvasynth = xvasynth
4447

4548
async def instantiate_wingmen(self, settings: SettingsConfig):
@@ -94,6 +97,7 @@ async def __instantiate_wingman(
9497
audio_player=self.audio_player,
9598
audio_library=self.audio_library,
9699
whispercpp=self.whispercpp,
100+
fasterwhisper=self.fasterwhisper,
97101
xvasynth=self.xvasynth,
98102
tower=self,
99103
)
@@ -105,6 +109,7 @@ async def __instantiate_wingman(
105109
audio_player=self.audio_player,
106110
audio_library=self.audio_library,
107111
whispercpp=self.whispercpp,
112+
fasterwhisper=self.fasterwhisper,
108113
xvasynth=self.xvasynth,
109114
tower=self,
110115
)
@@ -215,7 +220,9 @@ def disable_wingman(self, wingman_name: str):
215220
def save_wingman(self, wingman_name: str):
216221
for wingman in self.wingmen:
217222
if wingman.name == wingman_name:
218-
for wingman_file in self.config_manager.get_wingmen_configs(self.config_dir):
223+
for wingman_file in self.config_manager.get_wingmen_configs(
224+
self.config_dir
225+
):
219226
if wingman_file.name == wingman_name:
220227
self.config_manager.save_wingman_config(
221228
config_dir=self.config_dir,

‎templates/configs/defaults.yaml

+8-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ prompts:
3939
(END of "skills")
4040
features:
4141
tts_provider: wingman_pro
42-
stt_provider: whispercpp
42+
stt_provider: fasterwhisper
4343
conversation_provider: wingman_pro
4444
image_generation_provider: wingman_pro
4545
use_generic_instant_responses: false
@@ -103,6 +103,13 @@ azure:
103103
- de-DE
104104
whispercpp:
105105
temperature: 0.0
106+
fasterwhisper:
107+
beam_size: 1
108+
best_of: 2
109+
temperature: 0
110+
no_speech_threshold: 0.7
111+
language_detection_threshold: 0.5
112+
multilingual: false
106113
xvasynth:
107114
voice:
108115
model_directory: ""

‎templates/configs/settings.yaml

+12-4
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,20 @@ voice_activation:
1313
whispercpp:
1414
host: http://127.0.0.1
1515
port: 8080
16-
model: ggml-base.bin
17-
language: auto
18-
translate_to_english: false
19-
use_cuda: false
16+
enable: false
2017
whispercpp_config:
2118
temperature: 0.0
19+
fasterwhisper:
20+
model_size: base
21+
device: auto
22+
compute_type: auto
23+
fasterwhisper_config:
24+
beam_size: 1
25+
best_of: 2
26+
temperature: 0
27+
no_speech_threshold: 0.7
28+
language_detection_threshold: 0.5
29+
multilingual: false
2230
wingman_pro:
2331
base_url: https://wingman-ai.azurewebsites.net
2432
region: europe

‎wingman_core.py

+118-57
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import os
44
import re
55
import threading
6-
import pygame
76
from typing import Optional
7+
import pygame
88
from fastapi import APIRouter, File, UploadFile
99
import requests
1010
import sounddevice as sd
@@ -31,6 +31,7 @@
3131
WingmanInitializationError,
3232
)
3333
from providers.elevenlabs import ElevenLabs
34+
from providers.faster_whisper import FasterWhisper
3435
from providers.open_ai import OpenAi
3536
from providers.whispercpp import Whispercpp
3637
from providers.wingman_pro import WingmanPro
@@ -117,22 +118,24 @@ def __init__(
117118
tags=tags,
118119
)
119120
self.router.add_api_route(
120-
methods=["POST"],
121-
path="/whispercpp/start",
122-
endpoint=self.start_whispercpp,
121+
methods=["GET"],
122+
path="/fasterwhisper/modelsizes",
123+
response_model=list[str],
124+
endpoint=self.get_fasterwhisper_modelsizes,
123125
tags=tags,
124126
)
125127
self.router.add_api_route(
126-
methods=["POST"],
127-
path="/whispercpp/stop",
128-
endpoint=self.stop_whispercpp,
128+
methods=["GET"],
129+
path="/fasterwhisper/computetypes",
130+
response_model=list[str],
131+
endpoint=self.get_fasterwhisper_computetypes,
129132
tags=tags,
130133
)
131134
self.router.add_api_route(
132135
methods=["GET"],
133-
path="/whispercpp/models",
136+
path="/fasterwhisper/devices",
134137
response_model=list[str],
135-
endpoint=self.get_whispercpp_models,
138+
endpoint=self.get_fasterwhisper_devices,
136139
tags=tags,
137140
)
138141
self.router.add_api_route(
@@ -300,12 +303,15 @@ def __init__(
300303

301304
self.whispercpp = Whispercpp(
302305
settings=self.settings_service.settings.voice_activation.whispercpp,
303-
app_root_path=app_root_path,
304-
app_is_bundled=app_is_bundled,
306+
)
307+
self.fasterwhisper = FasterWhisper(
308+
settings=self.settings_service.settings.voice_activation.fasterwhisper,
305309
)
306310
self.xvasynth = XVASynth(settings=self.settings_service.settings.xvasynth)
307311
self.settings_service.initialize(
308-
whispercpp=self.whispercpp, xvasynth=self.xvasynth
312+
whispercpp=self.whispercpp,
313+
fasterwhisper=self.fasterwhisper,
314+
xvasynth=self.xvasynth,
309315
)
310316

311317
self.voice_service = VoiceService(
@@ -331,20 +337,35 @@ async def startup(self):
331337
await self.set_voice_activation(is_enabled=True)
332338

333339
def is_mouse_configured(self, config: Config) -> bool:
334-
return any(config.wingmen[wingman].record_mouse_button for wingman in config.wingmen)
335-
340+
return any(
341+
config.wingmen[wingman].record_mouse_button for wingman in config.wingmen
342+
)
343+
336344
def is_joystick_configured(self, config: Config) -> bool:
337-
return any(config.wingmen[wingman].record_joystick_button for wingman in config.wingmen)
338-
345+
return any(
346+
config.wingmen[wingman].record_joystick_button for wingman in config.wingmen
347+
)
348+
339349
async def start_joysticks(self, config: Config):
340350
pygame.init()
341351

342352
# Get all joystick configs
343-
joystick_configs = [config.wingmen[wingman].record_joystick_button for wingman in config.wingmen if config.wingmen[wingman].record_joystick_button]
344-
345-
joysticks = [pygame.joystick.Joystick(x) for x in range(pygame.joystick.get_count())]
353+
joystick_configs = [
354+
config.wingmen[wingman].record_joystick_button
355+
for wingman in config.wingmen
356+
if config.wingmen[wingman].record_joystick_button
357+
]
358+
359+
joysticks = [
360+
pygame.joystick.Joystick(x) for x in range(pygame.joystick.get_count())
361+
]
346362
for joystick in joysticks:
347-
if any([joystick.get_guid() == joystick_config.guid for joystick_config in joystick_configs]):
363+
if any(
364+
[
365+
joystick.get_guid() == joystick_config.guid
366+
for joystick_config in joystick_configs
367+
]
368+
):
348369
joystick.init()
349370

350371
running = True
@@ -356,12 +377,20 @@ async def start_joysticks(self, config: Config):
356377
joystick_origin = pygame.joystick.Joystick(event.joy)
357378
for joystick_config in joystick_configs:
358379
if joystick_origin.get_guid() == joystick_config.guid:
359-
self.on_press(joystick_config=CommandJoystickConfig(guid=joystick_config.guid, button=event.button))
380+
self.on_press(
381+
joystick_config=CommandJoystickConfig(
382+
guid=joystick_config.guid, button=event.button
383+
)
384+
)
360385
elif event.type == pygame.JOYBUTTONUP:
361386
joystick_origin = pygame.joystick.Joystick(event.joy)
362387
for joystick_config in joystick_configs:
363388
if joystick_origin.get_guid() == joystick_config.guid:
364-
self.on_release(joystick_config=CommandJoystickConfig(guid=joystick_config.guid, button=event.button))
389+
self.on_release(
390+
joystick_config=CommandJoystickConfig(
391+
guid=joystick_config.guid, button=event.button
392+
)
393+
)
365394

366395
# Add a small sleep to prevent the loop from consuming too much CPU
367396
await asyncio.sleep(0.01)
@@ -400,6 +429,7 @@ async def initialize_tower(self, config_dir_info: ConfigWithDirInfo):
400429
audio_player=self.audio_player,
401430
audio_library=self.audio_library,
402431
whispercpp=self.whispercpp,
432+
fasterwhisper=self.fasterwhisper,
403433
xvasynth=self.xvasynth,
404434
)
405435
self.tower_errors = await self.tower.instantiate_wingmen(
@@ -432,7 +462,9 @@ def is_hotkey_pressed(self, hotkey: list[int] | str) -> bool:
432462

433463
return is_pressed
434464

435-
def on_press(self, key=None, mouse_button=None, joystick_config: CommandJoystickConfig=None):
465+
def on_press(
466+
self, key=None, mouse_button=None, joystick_config: CommandJoystickConfig = None
467+
):
436468
is_mute_hotkey_pressed = self.is_hotkey_pressed(
437469
self.settings_service.settings.voice_activation.mute_toggle_key_codes
438470
or self.settings_service.settings.voice_activation.mute_toggle_key
@@ -457,17 +489,23 @@ def on_press(self, key=None, mouse_button=None, joystick_config: CommandJoystick
457489
wingman = potential_wingman
458490
break
459491
if joystick_config:
460-
if potential_wingman.get_record_joystick_button() == f"{joystick_config.guid}{joystick_config.button}":
492+
if (
493+
potential_wingman.get_record_joystick_button()
494+
== f"{joystick_config.guid}{joystick_config.button}"
495+
):
461496
wingman = potential_wingman
462497
break
463-
498+
464499
if wingman:
465500
if key:
466501
self.active_recording = dict(key=key.name, wingman=wingman)
467502
elif mouse_button:
468503
self.active_recording = dict(key=mouse_button, wingman=wingman)
469504
elif joystick_config:
470-
self.active_recording = dict(key=f"{joystick_config.guid}{joystick_config.button}", wingman=wingman)
505+
self.active_recording = dict(
506+
key=f"{joystick_config.guid}{joystick_config.button}",
507+
wingman=wingman,
508+
)
471509

472510
self.was_listening_before_ptt = self.is_listening
473511
if (
@@ -478,12 +516,18 @@ def on_press(self, key=None, mouse_button=None, joystick_config: CommandJoystick
478516

479517
self.audio_recorder.start_recording(wingman_name=wingman.name)
480518

481-
def on_release(self, key=None, mouse_button=None, joystick_config: CommandJoystickConfig=None):
519+
def on_release(
520+
self, key=None, mouse_button=None, joystick_config: CommandJoystickConfig = None
521+
):
482522
if self.tower and (
483523
key is not None
484524
and self.active_recording["key"] == key.name
485525
or self.active_recording["key"] == mouse_button
486-
or (joystick_config and self.active_recording["key"] == f"{joystick_config.guid}{joystick_config.button}")
526+
or (
527+
joystick_config
528+
and self.active_recording["key"]
529+
== f"{joystick_config.guid}{joystick_config.button}"
530+
)
487531
):
488532
wingman = self.active_recording["wingman"]
489533
recorded_audio_wav = self.audio_recorder.stop_recording(
@@ -849,34 +893,52 @@ def reset_conversation_history(self, wingman_name: Optional[str] = None):
849893
)
850894
return True
851895

852-
# POST /whispercpp/start
853-
def start_whispercpp(self):
854-
self.whispercpp.start_server()
855-
856-
# POST /whispercpp/stop
857-
def stop_whispercpp(self):
858-
try:
859-
self.whispercpp.stop_server()
860-
except Exception:
861-
pass
862-
863-
# GET /whispercpp/models
864-
def get_whispercpp_models(self):
865-
model_files = []
866-
try:
867-
model_files = [
868-
f
869-
for f in os.listdir(self.whispercpp.models_dir)
870-
if os.path.isfile(os.path.join(self.whispercpp.models_dir, f))
871-
and f.endswith(".bin")
872-
]
873-
except Exception:
874-
# this can fail:
875-
# - on MacOS (always)
876-
# - in Dev mode if the dev hasn't copied the whispercpp-models dir to the repository
877-
# in these cases, we return an empty list and the client will lock the controls and show a warning.
878-
pass
879-
return model_files
896+
# GET /fasterwhisper/modelsizes
897+
def get_fasterwhisper_modelsizes(self):
898+
model_sizes = [
899+
"tiny",
900+
"tiny.en",
901+
"base",
902+
"base.en",
903+
"small",
904+
"small.en",
905+
"distil-small.en",
906+
"medium",
907+
"medium.en",
908+
"distil-medium.en",
909+
"large-v1",
910+
"large-v2",
911+
"large-v3",
912+
"large",
913+
"distil-large-v2",
914+
"distil-large-v3",
915+
"large-v3-turbo",
916+
"turbo",
917+
]
918+
return model_sizes
919+
920+
# GET /fasterwhisper/computetypes
921+
def get_fasterwhisper_computetypes(self):
922+
compute_types = [
923+
"default",
924+
"auto",
925+
"int8",
926+
"int16",
927+
"int8_float16",
928+
"int8_float32",
929+
"float16",
930+
"float32",
931+
]
932+
return compute_types
933+
934+
# GET /fasterwhisper/devices
935+
def get_fasterwhisper_devices(self):
936+
devices = [
937+
"auto",
938+
"cpu",
939+
"cuda",
940+
]
941+
return devices
880942

881943
# POST /xvasynth/start
882944
def start_xvasynth(self):
@@ -1095,6 +1157,5 @@ async def get_elevenlabs_subscription_data(self):
10951157
self.printr.toast_error(f"Elevenlabs: \n{str(e)}")
10961158

10971159
async def shutdown(self):
1098-
await self.stop_whispercpp()
10991160
await self.stop_xvasynth()
11001161
await self.unload_tower()

‎wingmen/open_ai_wingman.py

+100-27
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import asyncio
44
import random
55
import traceback
6-
from idlelib.pyparse import trans
76
from typing import Mapping, Optional
87
from openai.types.chat import ChatCompletion
98
from api.interface import (
@@ -19,7 +18,8 @@
1918
SttProvider,
2019
ConversationProvider,
2120
WingmanProSttProvider,
22-
WingmanProTtsProvider, WingmanInitializationErrorType,
21+
WingmanProTtsProvider,
22+
WingmanInitializationErrorType,
2323
)
2424
from providers.edge import Edge
2525
from providers.elevenlabs import ElevenLabs
@@ -84,6 +84,11 @@ async def validate(self):
8484
errors = await super().validate()
8585

8686
try:
87+
if self.uses_provider("whispercpp"):
88+
self.whispercpp.validate(self.name, errors)
89+
if self.uses_provider("fasterwhisper"):
90+
self.fasterwhisper.validate(errors)
91+
8792
if self.uses_provider("openai"):
8893
await self.validate_and_set_openai(errors)
8994

@@ -116,6 +121,7 @@ async def validate(self):
116121

117122
if self.uses_provider("perplexity"):
118123
await self.validate_and_set_perplexity(errors)
124+
119125
except Exception as e:
120126
errors.append(
121127
WingmanInitializationError(
@@ -124,7 +130,11 @@ async def validate(self):
124130
error_type=WingmanInitializationErrorType.UNKNOWN,
125131
)
126132
)
127-
printr.print(f"Error during provider validation: {str(e)}", color=LogType.ERROR, server_only=True)
133+
printr.print(
134+
f"Error during provider validation: {str(e)}",
135+
color=LogType.ERROR,
136+
server_only=True,
137+
)
128138
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
129139

130140
return errors
@@ -199,6 +209,8 @@ def uses_provider(self, provider_type: str):
199209
return self.config.features.tts_provider == TtsProvider.XVASYNTH
200210
elif provider_type == "whispercpp":
201211
return self.config.features.stt_provider == SttProvider.WHISPERCPP
212+
elif provider_type == "fasterwhisper":
213+
return self.config.features.stt_provider == SttProvider.FASTER_WHISPER
202214
elif provider_type == "wingman_pro":
203215
return any(
204216
[
@@ -227,7 +239,10 @@ async def prepare(self):
227239
)
228240
self.threaded_execution(self._generate_instant_responses)
229241
except Exception as e:
230-
await printr.print_async(f"Error while preparing wingman '{self.name}': {str(e)}", color=LogType.ERROR)
242+
await printr.print_async(
243+
f"Error while preparing wingman '{self.name}': {str(e)}",
244+
color=LogType.ERROR,
245+
)
231246
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
232247

233248
async def unload_skills(self):
@@ -242,7 +257,10 @@ async def prepare_skill(self, skill: Skill):
242257
self.tool_skills[tool_name] = skill
243258
self.skill_tools.append(tool)
244259
except Exception as e:
245-
await printr.print_async(f"Error while preparing skill '{skill.name}': {str(e)}", color=LogType.ERROR)
260+
await printr.print_async(
261+
f"Error while preparing skill '{skill.name}': {str(e)}",
262+
color=LogType.ERROR,
263+
)
246264
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
247265

248266
# init skill methods
@@ -335,7 +353,9 @@ async def validate_and_set_wingman_pro(self):
335353
wingman_name=self.name, settings=self.settings.wingman_pro
336354
)
337355

338-
async def validate_and_set_perplexity(self, errors: list[WingmanInitializationError]):
356+
async def validate_and_set_perplexity(
357+
self, errors: list[WingmanInitializationError]
358+
):
339359
api_key = await self.retrieve_secret("perplexity", errors)
340360
if api_key:
341361
self.perplexity = OpenAi(
@@ -361,7 +381,10 @@ async def update_settings(self, settings: SettingsConfig):
361381
server_only=True,
362382
)
363383
except Exception as e:
364-
await printr.print_async(f"Error while updating settings for wingman '{self.name}': {str(e)}", color=LogType.ERROR)
384+
await printr.print_async(
385+
f"Error while updating settings for wingman '{self.name}': {str(e)}",
386+
color=LogType.ERROR,
387+
)
365388
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
366389

367390
async def _generate_instant_responses(self) -> None:
@@ -423,7 +446,10 @@ async def _generate_instant_responses(self) -> None:
423446
completion = await self.actual_llm_call(messages)
424447
retry_count += 1
425448
except Exception as e:
426-
await printr.print_async(f"Error while generating instant responses: {str(e)}", color=LogType.ERROR)
449+
await printr.print_async(
450+
f"Error while generating instant responses: {str(e)}",
451+
color=LogType.ERROR,
452+
)
427453
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
428454

429455
async def _transcribe(self, audio_input_wav: str) -> str | None:
@@ -454,8 +480,17 @@ async def _transcribe(self, audio_input_wav: str) -> str | None:
454480
transcript = self.whispercpp.transcribe(
455481
filename=audio_input_wav, config=self.config.whispercpp
456482
)
483+
elif self.config.features.stt_provider == SttProvider.FASTER_WHISPER:
484+
transcript = self.fasterwhisper.transcribe(
485+
filename=audio_input_wav,
486+
config=self.config.fasterwhisper,
487+
wingman_name=self.name,
488+
)
457489
elif self.config.features.stt_provider == SttProvider.WINGMAN_PRO:
458-
if self.config.wingman_pro.stt_provider == WingmanProSttProvider.WHISPER:
490+
if (
491+
self.config.wingman_pro.stt_provider
492+
== WingmanProSttProvider.WHISPER
493+
):
459494
transcript = self.wingman_pro.transcribe_whisper(
460495
filename=audio_input_wav
461496
)
@@ -469,7 +504,10 @@ async def _transcribe(self, audio_input_wav: str) -> str | None:
469504
elif self.config.features.stt_provider == SttProvider.OPENAI:
470505
transcript = self.openai.transcribe(filename=audio_input_wav)
471506
except Exception as e:
472-
await printr.print_async(f"Error during transcription using '{self.config.features.stt_provider}': {str(e)}", color=LogType.ERROR)
507+
await printr.print_async(
508+
f"Error during transcription using '{self.config.features.stt_provider}': {str(e)}",
509+
color=LogType.ERROR,
510+
)
473511
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
474512

475513
if not transcript:
@@ -896,8 +934,12 @@ async def generate_image(self, text: str) -> str:
896934
try:
897935
return await self.wingman_pro.generate_image(text)
898936
except Exception as e:
899-
await printr.print_async(f"Error during image generation: {str(e)}", color=LogType.ERROR)
900-
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
937+
await printr.print_async(
938+
f"Error during image generation: {str(e)}", color=LogType.ERROR
939+
)
940+
printr.print(
941+
traceback.format_exc(), color=LogType.ERROR, server_only=True
942+
)
901943

902944
return ""
903945

@@ -914,33 +956,45 @@ async def actual_llm_call(self, messages, tools: list[dict] = None):
914956
config=self.config.azure.conversation,
915957
tools=tools,
916958
)
917-
elif self.config.features.conversation_provider == ConversationProvider.OPENAI:
959+
elif (
960+
self.config.features.conversation_provider
961+
== ConversationProvider.OPENAI
962+
):
918963
completion = self.openai.ask(
919964
messages=messages,
920965
tools=tools,
921966
model=self.config.openai.conversation_model,
922967
)
923-
elif self.config.features.conversation_provider == ConversationProvider.MISTRAL:
968+
elif (
969+
self.config.features.conversation_provider
970+
== ConversationProvider.MISTRAL
971+
):
924972
completion = self.mistral.ask(
925973
messages=messages,
926974
tools=tools,
927975
model=self.config.mistral.conversation_model.value,
928976
)
929-
elif self.config.features.conversation_provider == ConversationProvider.GROQ:
977+
elif (
978+
self.config.features.conversation_provider == ConversationProvider.GROQ
979+
):
930980
completion = self.groq.ask(
931981
messages=messages,
932982
tools=tools,
933983
model=self.config.groq.conversation_model,
934984
)
935985
elif (
936-
self.config.features.conversation_provider == ConversationProvider.CEREBRAS
986+
self.config.features.conversation_provider
987+
== ConversationProvider.CEREBRAS
937988
):
938989
completion = self.cerebras.ask(
939990
messages=messages,
940991
tools=tools,
941992
model=self.config.cerebras.conversation_model,
942993
)
943-
elif self.config.features.conversation_provider == ConversationProvider.GOOGLE:
994+
elif (
995+
self.config.features.conversation_provider
996+
== ConversationProvider.GOOGLE
997+
):
944998
completion = self.google.ask(
945999
messages=messages,
9461000
tools=tools,
@@ -956,7 +1010,8 @@ async def actual_llm_call(self, messages, tools: list[dict] = None):
9561010
model=self.config.openrouter.conversation_model,
9571011
)
9581012
elif (
959-
self.config.features.conversation_provider == ConversationProvider.LOCAL_LLM
1013+
self.config.features.conversation_provider
1014+
== ConversationProvider.LOCAL_LLM
9601015
):
9611016
completion = self.local_llm.ask(
9621017
messages=messages,
@@ -973,15 +1028,18 @@ async def actual_llm_call(self, messages, tools: list[dict] = None):
9731028
tools=tools,
9741029
)
9751030
elif (
976-
self.config.features.conversation_provider == ConversationProvider.PERPLEXITY
1031+
self.config.features.conversation_provider
1032+
== ConversationProvider.PERPLEXITY
9771033
):
9781034
completion = self.perplexity.ask(
9791035
messages=messages,
9801036
tools=tools,
9811037
model=self.config.perplexity.conversation_model.value,
9821038
)
9831039
except Exception as e:
984-
await printr.print_async(f"Error during LLM call: {str(e)}", color=LogType.ERROR)
1040+
await printr.print_async(
1041+
f"Error during LLM call: {str(e)}", color=LogType.ERROR
1042+
)
9851043
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
9861044
return None
9871045

@@ -1087,8 +1145,12 @@ async def _handle_tool_calls(self, tool_calls):
10871145
self._add_tool_response(tool_call, function_response)
10881146
except Exception as e:
10891147
self._add_tool_response(tool_call, "Error")
1090-
await printr.print_async(f"Error while processing tool call: {str(e)}", color=LogType.ERROR)
1091-
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
1148+
await printr.print_async(
1149+
f"Error while processing tool call: {str(e)}", color=LogType.ERROR
1150+
)
1151+
printr.print(
1152+
traceback.format_exc(), color=LogType.ERROR, server_only=True
1153+
)
10921154

10931155
return instant_response, skill
10941156

@@ -1136,9 +1198,16 @@ async def execute_command_by_function_call(
11361198
if instant_response:
11371199
await self.play_to_user(instant_response)
11381200
except Exception as e:
1139-
await printr.print_async(f"Error while processing skill '{skill.name}': {str(e)}", color=LogType.ERROR)
1140-
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
1141-
function_response = "ERROR DURING PROCESSING" # hints to AI that there was an Error
1201+
await printr.print_async(
1202+
f"Error while processing skill '{skill.name}': {str(e)}",
1203+
color=LogType.ERROR,
1204+
)
1205+
printr.print(
1206+
traceback.format_exc(), color=LogType.ERROR, server_only=True
1207+
)
1208+
function_response = (
1209+
"ERROR DURING PROCESSING" # hints to AI that there was an Error
1210+
)
11421211
instant_response = None
11431212

11441213
return function_response, instant_response, used_skill
@@ -1230,7 +1299,9 @@ async def play_to_user(
12301299
audio_player=self.audio_player,
12311300
wingman_name=self.name,
12321301
)
1233-
elif self.config.wingman_pro.tts_provider == WingmanProTtsProvider.AZURE:
1302+
elif (
1303+
self.config.wingman_pro.tts_provider == WingmanProTtsProvider.AZURE
1304+
):
12341305
await self.wingman_pro.generate_azure_speech(
12351306
text=text,
12361307
config=self.config.azure.tts,
@@ -1243,7 +1314,9 @@ async def play_to_user(
12431314
f"Unsupported TTS provider: {self.config.features.tts_provider}"
12441315
)
12451316
except Exception as e:
1246-
await printr.print_async(f"Error during TTS playback: {str(e)}", color=LogType.ERROR)
1317+
await printr.print_async(
1318+
f"Error during TTS playback: {str(e)}", color=LogType.ERROR
1319+
)
12471320
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
12481321

12491322
async def _execute_command(self, command: dict) -> str:

‎wingmen/wingman.py

+66-18
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
LogType,
2424
WingmanInitializationErrorType,
2525
)
26+
from providers.faster_whisper import FasterWhisper
2627
from providers.whispercpp import Whispercpp
2728
from providers.xvasynth import XVASynth
2829
from services.audio_player import AudioPlayer
@@ -52,6 +53,7 @@ def __init__(
5253
audio_player: AudioPlayer,
5354
audio_library: AudioLibrary,
5455
whispercpp: Whispercpp,
56+
fasterwhisper: FasterWhisper,
5557
xvasynth: XVASynth,
5658
tower: "Tower",
5759
):
@@ -87,6 +89,9 @@ def __init__(
8789
self.whispercpp = whispercpp
8890
"""A class that handles the communication with the Whispercpp server for transcription."""
8991

92+
self.fasterwhisper = fasterwhisper
93+
"""A class that handles local transcriptions using FasterWhisper."""
94+
9095
self.xvasynth = xvasynth
9196
"""A class that handles the communication with the XVASynth server for TTS."""
9297

@@ -102,7 +107,7 @@ def get_record_key(self) -> str | int:
102107
def get_record_mouse_button(self) -> str:
103108
"""Returns the activation or "push-to-talk" mouse button for this Wingman."""
104109
return self.config.record_mouse_button
105-
110+
106111
def get_record_joystick_button(self) -> str:
107112
"""Returns the activation or "push-to-talk" joystick button for this Wingman."""
108113
if not self.config.record_joystick_button:
@@ -160,7 +165,11 @@ async def retrieve_secret(self, secret_name, errors):
160165
)
161166
)
162167
except Exception as e:
163-
printr.print(f"Error retrieving secret ''{secret_name}: {e}", color=LogType.ERROR, server_only=True)
168+
printr.print(
169+
f"Error retrieving secret ''{secret_name}: {e}",
170+
color=LogType.ERROR,
171+
server_only=True,
172+
)
164173
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
165174
errors.append(
166175
WingmanInitializationError(
@@ -190,8 +199,13 @@ async def unload_skills(self):
190199
try:
191200
await skill.unload()
192201
except Exception as e:
193-
await printr.print_async(f"Error unloading skill '{skill.name}': {str(e)}", color=LogType.ERROR)
194-
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
202+
await printr.print_async(
203+
f"Error unloading skill '{skill.name}': {str(e)}",
204+
color=LogType.ERROR,
205+
)
206+
printr.print(
207+
traceback.format_exc(), color=LogType.ERROR, server_only=True
208+
)
195209

196210
async def init_skills(self) -> list[WingmanInitializationError]:
197211
"""This method is called when the Wingman is instantiated by Tower or when a skill's config changes.
@@ -246,8 +260,13 @@ async def init_skills(self) -> list[WingmanInitializationError]:
246260
color=LogType.ERROR,
247261
)
248262
except Exception as e:
249-
await printr.print_async(f"Error loading skill '{skill_config.name}': {str(e)}", color=LogType.ERROR)
250-
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
263+
await printr.print_async(
264+
f"Error loading skill '{skill_config.name}': {str(e)}",
265+
color=LogType.ERROR,
266+
)
267+
printr.print(
268+
traceback.format_exc(), color=LogType.ERROR, server_only=True
269+
)
251270

252271
return errors
253272

@@ -287,7 +306,9 @@ async def process(self, audio_input_wav: str = None, transcript: str = None):
287306
process_result = None
288307

289308
if self.settings.debug_mode and not transcript:
290-
await printr.print_async("Starting transcription...", color=LogType.INFO)
309+
await printr.print_async(
310+
"Starting transcription...", color=LogType.INFO
311+
)
291312

292313
if not transcript:
293314
# transcribe the audio.
@@ -332,7 +353,10 @@ async def process(self, audio_input_wav: str = None, transcript: str = None):
332353
if process_result:
333354
await self.play_to_user(str(process_result), not interrupt)
334355
except Exception as e:
335-
await printr.print_async(f"Error during processing of wingmann ''{self.name}: {str(e)}", color=LogType.ERROR)
356+
await printr.print_async(
357+
f"Error during processing of wingmann ''{self.name}: {str(e)}",
358+
color=LogType.ERROR,
359+
)
336360
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
337361

338362
# ───────────────── virtual methods / hooks ───────────────── #
@@ -432,13 +456,18 @@ async def _execute_instant_activation_command(
432456
if command.instant_activation:
433457
for phrase in command.instant_activation:
434458
if phrase.lower() in commands_by_instant_activation:
435-
commands_by_instant_activation[phrase.lower()].append(command)
459+
commands_by_instant_activation[phrase.lower()].append(
460+
command
461+
)
436462
else:
437463
commands_by_instant_activation[phrase.lower()] = [command]
438464

439465
# find best matching phrase
440466
phrase = difflib.get_close_matches(
441-
transcript.lower(), commands_by_instant_activation.keys(), n=1, cutoff=0.8
467+
transcript.lower(),
468+
commands_by_instant_activation.keys(),
469+
n=1,
470+
cutoff=0.8,
442471
)
443472

444473
# if no phrase found, return None
@@ -453,7 +482,10 @@ async def _execute_instant_activation_command(
453482
# return the executed command
454483
return commands
455484
except Exception as e:
456-
await printr.print_async(f"Error during instant activation in wingmann '{self.name}': {str(e)}", color=LogType.ERROR)
485+
await printr.print_async(
486+
f"Error during instant activation in wingmann '{self.name}': {str(e)}",
487+
color=LogType.ERROR,
488+
)
457489
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
458490
return None
459491

@@ -482,7 +514,8 @@ async def _execute_command(self, command: CommandConfig) -> str:
482514

483515
if len(command.actions or []) == 0:
484516
await printr.print_async(
485-
f"No actions found for command: {command.name}", color=LogType.WARNING
517+
f"No actions found for command: {command.name}",
518+
color=LogType.WARNING,
486519
)
487520

488521
# handle the global special commands:
@@ -491,9 +524,12 @@ async def _execute_command(self, command: CommandConfig) -> str:
491524

492525
return self._select_command_response(command) or "Ok"
493526
except Exception as e:
494-
await printr.print_async(f"Error executing command '{command.name}' for wingman '{self.name}': {str(e)}", color=LogType.ERROR)
527+
await printr.print_async(
528+
f"Error executing command '{command.name}' for wingman '{self.name}': {str(e)}",
529+
color=LogType.ERROR,
530+
)
495531
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
496-
return "ERROR DURING PROCESSING" # hints to AI that there was an Error
532+
return "ERROR DURING PROCESSING" # hints to AI that there was an Error
497533

498534
async def execute_action(self, command: CommandConfig):
499535
"""Executes the actions defined in the command (in order).
@@ -580,12 +616,16 @@ async def execute_action(self, command: CommandConfig):
580616
action.audio, self.config.sound.volume
581617
)
582618
except Exception as e:
583-
await printr.print_async(f"Error executing actions of command '{command.name}' for wingman '{self.name}': {str(e)}", color=LogType.ERROR)
619+
await printr.print_async(
620+
f"Error executing actions of command '{command.name}' for wingman '{self.name}': {str(e)}",
621+
color=LogType.ERROR,
622+
)
584623
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
585624

586625
def threaded_execution(self, function, *args) -> threading.Thread | None:
587626
"""Execute a function in a separate thread."""
588627
try:
628+
589629
def start_thread(function, *args):
590630
if asyncio.iscoroutinefunction(function):
591631
new_loop = asyncio.new_event_loop()
@@ -599,7 +639,9 @@ def start_thread(function, *args):
599639
thread.start()
600640
return thread
601641
except Exception as e:
602-
printr.print(f"Error starting threaded execution: {str(e)}", color=LogType.ERROR)
642+
printr.print(
643+
f"Error starting threaded execution: {str(e)}", color=LogType.ERROR
644+
)
603645
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
604646
return None
605647

@@ -620,13 +662,19 @@ async def update_config(
620662
errors = await self.validate()
621663

622664
for error in errors:
623-
if error.error_type != WingmanInitializationErrorType.MISSING_SECRET:
665+
if (
666+
error.error_type
667+
!= WingmanInitializationErrorType.MISSING_SECRET
668+
):
624669
self.config = old_config
625670
return False
626671

627672
return True
628673
except Exception as e:
629-
await printr.print_async(f"Error updating config for wingman '{self.name}': {str(e)}", color=LogType.ERROR)
674+
await printr.print_async(
675+
f"Error updating config for wingman '{self.name}': {str(e)}",
676+
color=LogType.ERROR,
677+
)
630678
printr.print(traceback.format_exc(), color=LogType.ERROR, server_only=True)
631679
return False
632680

0 commit comments

Comments
 (0)
Please sign in to comment.