Skip to content

Commit 71e3769

Browse files
authored
fix audio channel problems and add mix layer files (#187)
* sound effects: add layer files to mix in noise * resample audio samples to 44khz mono * works for non-streaming * make it work for streaming, too. Change gain_boost to dB * add static beep effect for high quality radio effect * fix Edge but break Beeps * fix beeps for Edge/stereo * remove prints
1 parent f5dd012 commit 71e3769

10 files changed

+254
-32
lines changed

audio_samples/Brown_Noise.wav

-6.91 MB
Binary file not shown.

audio_samples/Pink_Noise.wav

-6.91 MB
Binary file not shown.

audio_samples/Radio_Noise.wav

-6.86 MB
Binary file not shown.

audio_samples/Radio_Static.wav

-1020 KB
Binary file not shown.

audio_samples/Radio_Static_Beep.wav

-38.5 KB
Binary file not shown.

audio_samples/White_Noise.wav

-6.91 MB
Binary file not shown.

audio_samples/low_quality_radio.wav

-518 KB
Binary file not shown.

providers/elevenlabs.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
GenerationOptions,
44
PlaybackOptions,
55
)
6-
from api.enums import ElevenlabsModel, WingmanInitializationErrorType
6+
from api.enums import ElevenlabsModel, SoundEffect, WingmanInitializationErrorType
77
from api.interface import ElevenlabsConfig, SoundConfig, WingmanInitializationError
88
from services.audio_player import AudioPlayer
99
from services.secret_keeper import SecretKeeper
@@ -53,6 +53,10 @@ async def play_audio(
5353
def notify_playback_finished():
5454
audio_player.playback_events.unsubscribe("finished", playback_finished)
5555

56+
contains_high_end_radio = SoundEffect.HIGH_END_RADIO in sound_config.effects
57+
if contains_high_end_radio:
58+
audio_player.play_wav("Radio_Static_Beep.wav")
59+
5660
if sound_config.play_beep:
5761
audio_player.play_wav("beep.wav")
5862
elif sound_config.play_beep_apollo:
@@ -68,6 +72,10 @@ def notify_playback_started():
6872
elif sound_config.play_beep_apollo:
6973
audio_player.play_wav("Apollo_Beep.wav")
7074

75+
contains_high_end_radio = SoundEffect.HIGH_END_RADIO in sound_config.effects
76+
if contains_high_end_radio:
77+
audio_player.play_wav("Radio_Static_Beep.wav")
78+
7179
WebSocketUser.ensure_async(
7280
audio_player.notify_playback_started(wingman_name)
7381
)

services/audio_player.py

+201-18
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,14 @@
77
import soundfile as sf
88
import sounddevice as sd
99
from scipy.signal import resample
10+
from api.enums import SoundEffect
1011
from api.interface import SoundConfig
1112
from services.pub_sub import PubSub
12-
from services.sound_effects import get_sound_effects
13+
from services.sound_effects import (
14+
get_additional_layer_file,
15+
get_azure_workaround_gain_boost,
16+
get_sound_effects,
17+
)
1318

1419

1520
class AudioPlayer:
@@ -29,6 +34,9 @@ def __init__(
2934
self.stream_event = PubSub()
3035
self.on_playback_started = on_playback_started
3136
self.on_playback_finished = on_playback_finished
37+
self.sample_dir = path.join(
38+
path.abspath(path.dirname(__file__)), "../audio_samples"
39+
)
3240

3341
def set_event_loop(self, loop: asyncio.AbstractEventLoop):
3442
self.event_loop = loop
@@ -37,14 +45,51 @@ def start_playback(self, audio, sample_rate, channels, finished_callback):
3745
def callback(outdata, frames, time, status):
3846
nonlocal playhead
3947
chunksize = frames * channels
40-
current_chunk = audio[playhead : playhead + chunksize].reshape(-1, channels)
41-
if current_chunk.shape[0] < frames:
42-
outdata[: current_chunk.shape[0]] = current_chunk
43-
outdata[current_chunk.shape[0] :] = 0 # Fill the rest with zeros
44-
raise sd.CallbackStop # Stop the stream after playing the current chunk
48+
49+
if playhead * channels >= len(audio):
50+
if np.issubdtype(outdata.dtype, np.floating):
51+
outdata.fill(0.0) # Fill with zero for floats
52+
else:
53+
outdata[:] = bytes(
54+
len(outdata)
55+
) # Fill with zeros for buffer of int types
56+
raise sd.CallbackStop
57+
58+
end = min(playhead + chunksize, len(audio) // channels)
59+
current_chunk = audio[playhead:end]
60+
61+
if channels > 1 and current_chunk.ndim == 1:
62+
current_chunk = np.tile(current_chunk[:, None], (1, channels)).flatten()
63+
64+
# It's critical that current_chunk matches the number of elements in outdata
65+
required_length = frames * channels
66+
current_chunk = current_chunk[:required_length]
67+
68+
if len(current_chunk) < required_length:
69+
current_chunk = np.pad(
70+
current_chunk, (0, required_length - len(current_chunk)), "constant"
71+
)
72+
73+
if outdata.dtype == np.float32 or outdata.dtype == np.float64:
74+
outdata[:required_length] = current_chunk.astype(outdata.dtype).reshape(
75+
outdata.shape
76+
)
4577
else:
46-
outdata[:] = current_chunk
47-
playhead += chunksize # Advance the playhead
78+
current_chunk_bytes = current_chunk.astype(outdata.dtype).tobytes()
79+
outdata[: len(current_chunk_bytes)] = current_chunk_bytes[
80+
: len(outdata)
81+
]
82+
83+
playhead += chunksize
84+
85+
if end >= len(audio):
86+
if np.issubdtype(outdata.dtype, np.floating):
87+
outdata.fill(0.0) # Fill with zero for floats
88+
else:
89+
outdata[:] = bytes(
90+
len(outdata)
91+
) # Fill with zeros buffer of int types
92+
raise sd.CallbackStop
4893

4994
playhead = 0 # Tracks the position in the audio
5095

@@ -74,6 +119,7 @@ async def play_with_effects(
74119
input_data: bytes | tuple,
75120
config: SoundConfig,
76121
wingman_name: str = None,
122+
mixed_layer_gain_boost_db: float = -9.0,
77123
):
78124
if isinstance(input_data, bytes):
79125
audio, sample_rate = self._get_audio_from_stream(input_data)
@@ -90,6 +136,20 @@ async def play_with_effects(
90136
for sound_effect in sound_effects:
91137
audio = sound_effect(audio, sample_rate)
92138

139+
mixed_layer_file = None
140+
for effect in config.effects:
141+
if not mixed_layer_file:
142+
mixed_layer_file = get_additional_layer_file(effect)
143+
144+
if mixed_layer_file:
145+
audio = self._mix_in_layer(
146+
audio, sample_rate, mixed_layer_file, mixed_layer_gain_boost_db
147+
)
148+
149+
contains_high_end_radio = SoundEffect.HIGH_END_RADIO in config.effects
150+
if contains_high_end_radio:
151+
audio = self._add_wav_effect(audio, sample_rate, "Radio_Static_Beep.wav")
152+
93153
if config.play_beep:
94154
audio = self._add_wav_effect(audio, sample_rate, "beep.wav")
95155
elif config.play_beep_apollo:
@@ -130,9 +190,8 @@ async def notify_playback_finished(self, wingman_name: str):
130190
await self.on_playback_finished(wingman_name)
131191

132192
def play_wav(self, audio_sample_file: str):
133-
bundle_dir = path.abspath(path.dirname(__file__))
134193
beep_audio, beep_sample_rate = self.get_audio_from_file(
135-
path.join(bundle_dir, f"../audio_samples/{audio_sample_file}")
194+
path.join(self.sample_dir, audio_sample_file)
136195
)
137196
self.start_playback(beep_audio, beep_sample_rate, 1, None)
138197

@@ -147,15 +206,21 @@ def _get_audio_from_stream(self, stream: bytes) -> tuple:
147206
def _add_wav_effect(
148207
self, audio: np.ndarray, sample_rate: int, audio_sample_file: str
149208
) -> np.ndarray:
150-
bundle_dir = path.abspath(path.dirname(__file__))
151209
beep_audio, beep_sample_rate = self.get_audio_from_file(
152-
path.join(bundle_dir, f"../audio_samples/{audio_sample_file}")
210+
path.join(self.sample_dir, audio_sample_file)
153211
)
154212

155213
# Resample the beep sound if necessary to match the sample rate of 'audio'
156214
if beep_sample_rate != sample_rate:
157215
beep_audio = self._resample_audio(beep_audio, beep_sample_rate, sample_rate)
158216

217+
# Ensure beep_audio has the same number of channels as 'audio'
218+
if beep_audio.ndim == 1 and audio.ndim == 2:
219+
beep_audio = np.tile(beep_audio[:, np.newaxis], (1, audio.shape[1]))
220+
221+
if beep_audio.ndim == 2 and audio.ndim == 1:
222+
audio = audio[:, np.newaxis]
223+
159224
# Concatenate the beep sound to the start and end of the audio
160225
audio_with_beeps = np.concatenate((beep_audio, audio, beep_audio), axis=0)
161226

@@ -174,11 +239,52 @@ def _resample_audio(
174239

175240
return resampled_audio
176241

242+
def _mix_in_layer(
243+
self,
244+
audio: np.ndarray,
245+
sample_rate: int,
246+
mix_layer_file: str,
247+
mix_layer_gain_boost_db: float = 0.0,
248+
) -> np.ndarray:
249+
noise_audio, noise_sample_rate = self.get_audio_from_file(
250+
path.join(self.sample_dir, mix_layer_file)
251+
)
252+
253+
if noise_sample_rate != sample_rate:
254+
noise_audio = self._resample_audio(
255+
noise_audio, noise_sample_rate, sample_rate
256+
)
257+
258+
# Ensure both audio and noise_audio have compatible shapes for addition
259+
if noise_audio.ndim == 1:
260+
noise_audio = noise_audio[:, None]
261+
262+
if audio.ndim == 1:
263+
audio = audio[:, None]
264+
265+
if noise_audio.shape[1] != audio.shape[1]:
266+
noise_audio = np.tile(noise_audio, (1, audio.shape[1]))
267+
268+
# Ensure noise_audio length matches audio length
269+
if len(noise_audio) < len(audio):
270+
repeat_count = int(np.ceil(len(audio) / len(noise_audio)))
271+
noise_audio = np.tile(noise_audio, (repeat_count, 1))[: len(audio)]
272+
273+
noise_audio = noise_audio[: len(audio)]
274+
275+
# Convert gain boost from dB to amplitude factor
276+
amplitude_factor = 10 ** (mix_layer_gain_boost_db / 20)
277+
278+
# Apply volume scaling to the mixed-in layer
279+
audio_with_noise = audio + amplitude_factor * noise_audio
280+
return audio_with_noise
281+
177282
async def stream_with_effects(
178283
self,
179284
buffer_callback,
180285
config: SoundConfig,
181286
wingman_name: str,
287+
mix_layer_gain_boost_db: float = 0.0,
182288
buffer_size=2048,
183289
sample_rate=16000,
184290
channels=1,
@@ -188,14 +294,79 @@ async def stream_with_effects(
188294
buffer = bytearray()
189295
stream_finished = False
190296
data_received = False
297+
mixed_pos = 0
298+
299+
mix_layer_file = None
300+
for effect in config.effects:
301+
if not mix_layer_file:
302+
mix_layer_file = get_additional_layer_file(effect)
303+
# if we boost the actual audio, we need to boost the mixed layer as well
304+
if use_gain_boost:
305+
mix_layer_gain_boost_db += get_azure_workaround_gain_boost(effect)
306+
307+
if mix_layer_file:
308+
noise_audio, noise_sample_rate = self.get_audio_from_file(
309+
path.join(self.sample_dir, mix_layer_file)
310+
)
311+
if noise_sample_rate != sample_rate:
312+
noise_audio = self._resample_audio(
313+
noise_audio, noise_sample_rate, sample_rate
314+
)
315+
if channels > 1 and noise_audio.ndim == 1:
316+
noise_audio = np.tile(noise_audio[:, None], (1, channels))
317+
noise_audio = noise_audio.flatten()
318+
319+
def get_mixed_chunk(length):
320+
nonlocal mixed_pos, noise_audio
321+
chunk = np.zeros(length, dtype=np.float32)
322+
remaining = length
323+
while remaining > 0:
324+
if mixed_pos >= len(noise_audio):
325+
mixed_pos = 0
326+
end_pos = min(len(noise_audio), mixed_pos + remaining)
327+
chunk[
328+
length - remaining : length - remaining + (end_pos - mixed_pos)
329+
] = noise_audio[mixed_pos:end_pos]
330+
remaining -= end_pos - mixed_pos
331+
mixed_pos = end_pos
332+
return chunk
191333

192334
def callback(outdata, frames, time, status):
193-
nonlocal buffer, stream_finished, data_received
194-
335+
nonlocal buffer, stream_finished, data_received, mixed_pos
195336
if data_received and len(buffer) == 0:
196337
stream_finished = True
197-
outdata[: len(buffer)] = buffer[: len(outdata)]
198-
buffer = buffer[len(outdata) :]
338+
outdata[:] = bytes(len(outdata)) # Fill the buffer with zeros
339+
return
340+
341+
if len(buffer) > 0:
342+
num_elements = frames * channels
343+
byte_size = np.dtype(dtype).itemsize
344+
data_chunk = np.frombuffer(
345+
buffer[: num_elements * byte_size], dtype=dtype
346+
).astype(np.float32)
347+
348+
if len(data_chunk) < num_elements:
349+
data_chunk = np.pad(
350+
data_chunk, (0, num_elements - len(data_chunk)), "constant"
351+
)
352+
353+
if channels > 1 and data_chunk.ndim == 1:
354+
data_chunk = np.tile(data_chunk[:, None], (1, channels)).flatten()
355+
356+
data_chunk = data_chunk[: frames * channels]
357+
358+
if mix_layer_file:
359+
mix_chunk = get_mixed_chunk(len(data_chunk))
360+
# Convert gain boost from dB to amplitude factor
361+
amplitude_factor = 10 ** (mix_layer_gain_boost_db / 20)
362+
data_chunk = (
363+
data_chunk + mix_chunk[: len(data_chunk)] * amplitude_factor
364+
)
365+
366+
data_chunk = data_chunk.flatten()
367+
data_chunk_bytes = data_chunk.astype(dtype).tobytes()
368+
outdata[: len(data_chunk_bytes)] = data_chunk_bytes[: len(outdata)]
369+
buffer = buffer[num_elements * byte_size :]
199370

200371
with sd.RawOutputStream(
201372
samplerate=sample_rate,
@@ -215,6 +386,10 @@ def callback(outdata, frames, time, status):
215386
elif config.play_beep_apollo:
216387
self.play_wav("Apollo_Beep.wav")
217388

389+
contains_high_end_radio = SoundEffect.HIGH_END_RADIO in config.effects
390+
if contains_high_end_radio:
391+
self.play_wav("Radio_Static_Beep.wav")
392+
218393
self.raw_stream.start()
219394

220395
sound_effects = get_sound_effects(
@@ -232,17 +407,25 @@ def callback(outdata, frames, time, status):
232407
data_in_numpy, sample_rate, reset=False
233408
)
234409

410+
if mix_layer_file:
411+
noise_chunk = get_mixed_chunk(len(data_in_numpy))
412+
# Convert gain boost from dB to amplitude factor
413+
amplitude_factor = 10 ** (mix_layer_gain_boost_db / 20)
414+
data_in_numpy = data_in_numpy + noise_chunk * amplitude_factor
415+
235416
processed_buffer = data_in_numpy.astype(dtype).tobytes()
236417
buffer.extend(processed_buffer)
237-
238418
await self.stream_event.publish("audio", processed_buffer)
239-
240419
filled_size = buffer_callback(audio_buffer)
241420

242421
data_received = True
243422
while not stream_finished:
244423
sd.sleep(100)
245424

425+
contains_high_end_radio = SoundEffect.HIGH_END_RADIO in config.effects
426+
if contains_high_end_radio:
427+
self.play_wav("Radio_Static_Beep.wav")
428+
246429
if config.play_beep:
247430
self.play_wav("beep.wav")
248431
elif config.play_beep_apollo:

0 commit comments

Comments
 (0)