Skip to content

Commit a2a2916

Browse files
authored
Merge pull request #2443 from makermelissa/main
Added ChatGPT Voice Assistant
2 parents 0180bb4 + 62563b0 commit a2a2916

File tree

1 file changed

+234
-0
lines changed

1 file changed

+234
-0
lines changed

ChatGPT_Voice_Assistant/assistant.py

+234
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
# SPDX-FileCopyrightText: 2023 Melissa LeBlanc-Williams for Adafruit Industries
2+
#
3+
# SPDX-License-Identifier: MIT
4+
5+
import subprocess
6+
7+
import argparse
8+
import os
9+
import sys
10+
11+
from datetime import datetime, timedelta
12+
from queue import Queue
13+
from time import sleep
14+
from tempfile import NamedTemporaryFile
15+
16+
import speech_recognition as sr
17+
import openai
18+
19+
# Add your OpenAI API key here
20+
openai.api_key = "sk-..."
21+
SYSTEM_ROLE = (
22+
"You are a helpful voice assistant that answers questions and gives information"
23+
)
24+
25+
def speak(text):
26+
subprocess.run(["espeak-ng", text, "&"], check=False)
27+
28+
29+
def sendchat(prompt):
30+
completion = openai.ChatCompletion.create(
31+
model="gpt-3.5-turbo",
32+
messages=[
33+
{"role": "system", "content": SYSTEM_ROLE},
34+
{"role": "user", "content": prompt},
35+
],
36+
)
37+
# Send the heard text to ChatGPT and return the result
38+
return completion.choices[0].message.content
39+
40+
41+
def transcribe(wav_data):
42+
# Read the transcription.
43+
print("Transcribing...")
44+
with NamedTemporaryFile(suffix=".wav") as temp_file:
45+
result = openai.Audio.translate_raw("whisper-1", wav_data, temp_file.name)
46+
return result["text"].strip()
47+
48+
49+
class Listener:
50+
def __init__(
51+
self, default_microphone, record_timeout, energy_threshold, phrase_timeout
52+
):
53+
self.listener_handle = None
54+
self.recorder = sr.Recognizer()
55+
self.record_timeout = record_timeout
56+
self.recorder.energy_threshold = energy_threshold
57+
self.recorder.dynamic_energy_threshold = False
58+
self.recorder.pause_threshold = 1
59+
self.source = None
60+
self.last_sample = bytes()
61+
self.phrase_time = datetime.utcnow()
62+
self.phrase_timeout = phrase_timeout
63+
self.phrase_complete = False
64+
self.default_microphone = default_microphone
65+
# Thread safe Queue for passing data from the threaded recording callback.
66+
self.data_queue = Queue()
67+
self.source = self._get_microphone()
68+
69+
def _get_microphone(self):
70+
if self.source:
71+
return self.source
72+
mic_name = self.default_microphone
73+
source = None
74+
if not mic_name or mic_name == "list":
75+
print("Available microphone devices are: ")
76+
for index, name in enumerate(sr.Microphone.list_microphone_names()):
77+
print(f'Microphone with name "{name}" found')
78+
sys.exit()
79+
else:
80+
for index, name in enumerate(sr.Microphone.list_microphone_names()):
81+
if mic_name in name:
82+
print(f'Microphone with name "{name}" at index "{index}" found')
83+
source = sr.Microphone(sample_rate=16000, device_index=index)
84+
break
85+
if not source:
86+
print(f'Microphone with name "{mic_name}" not found')
87+
sys.exit()
88+
89+
with source:
90+
self.recorder.adjust_for_ambient_noise(source)
91+
92+
return source
93+
94+
def listen(self):
95+
if not self.listener_handle:
96+
with self._get_microphone() as source:
97+
audio = self.recorder.listen(source)
98+
data = audio.get_raw_data()
99+
self.data_queue.put(data)
100+
101+
def start(self):
102+
if not self.listener_handle:
103+
self.listener_handle = self.recorder.listen_in_background(
104+
self._get_microphone(),
105+
self.record_callback,
106+
phrase_time_limit=self.record_timeout,
107+
)
108+
109+
def stop(self, wait_for_stop: bool = False):
110+
self.listener_handle(wait_for_stop=wait_for_stop)
111+
self.listener_handle = None
112+
113+
def record_callback(self, _, audio: sr.AudioData) -> None:
114+
"""
115+
Threaded callback function to recieve audio data when recordings finish.
116+
audio: An AudioData containing the recorded bytes.
117+
"""
118+
# Grab the raw bytes and push it into the thread safe queue.
119+
data = audio.get_raw_data()
120+
self.data_queue.put(data)
121+
122+
def speech_waiting(self):
123+
return not self.data_queue.empty()
124+
125+
def get_speech(self):
126+
if self.speech_waiting():
127+
return self.data_queue.get()
128+
return None
129+
130+
def get_audio_data(self):
131+
now = datetime.utcnow()
132+
if self.speech_waiting():
133+
self.phrase_complete = False
134+
if self.phrase_time and now - self.phrase_time > timedelta(
135+
seconds=self.phrase_timeout
136+
):
137+
self.last_sample = bytes()
138+
self.phrase_complete = True
139+
self.phrase_time = now
140+
141+
# Concatenate our current audio data with the latest audio data.
142+
while self.speech_waiting():
143+
data = self.get_speech()
144+
self.last_sample += data
145+
146+
source = self._get_microphone()
147+
148+
# Use AudioData to convert the raw data to wav data.
149+
return sr.AudioData(
150+
self.last_sample, source.SAMPLE_RATE, source.SAMPLE_WIDTH
151+
)
152+
return None
153+
154+
155+
def main():
156+
parser = argparse.ArgumentParser()
157+
parser.add_argument(
158+
"--energy_threshold",
159+
default=1000,
160+
help="Energy level for mic to detect.",
161+
type=int,
162+
)
163+
parser.add_argument(
164+
"--record_timeout",
165+
default=2,
166+
help="How real time the recording is in seconds.",
167+
type=float,
168+
)
169+
parser.add_argument(
170+
"--phrase_timeout",
171+
default=3,
172+
help="How much empty space between recordings before we "
173+
"consider it a new line in the transcription.",
174+
type=float,
175+
)
176+
parser.add_argument(
177+
"--default_microphone",
178+
default="pulse",
179+
help="Default microphone name for SpeechRecognition. "
180+
"Run this with 'list' to view available Microphones.",
181+
type=str,
182+
)
183+
args = parser.parse_args()
184+
185+
listener = Listener(
186+
args.default_microphone,
187+
args.record_timeout,
188+
args.energy_threshold,
189+
args.phrase_timeout,
190+
)
191+
192+
transcription = [""]
193+
194+
print("How may I help you?")
195+
speak("How may I help you?")
196+
197+
while True:
198+
try:
199+
# Pull raw recorded audio from the queue.
200+
listener.listen()
201+
if listener.speech_waiting():
202+
audio_data = listener.get_audio_data()
203+
text = transcribe(audio_data.get_wav_data())
204+
205+
if text:
206+
if listener.phrase_complete:
207+
transcription.append(text)
208+
print(f"Phrase Complete. Sent '{text}' to ChatGPT.")
209+
chat_response = sendchat(text)
210+
transcription.append(f"> {chat_response}")
211+
print("Got response from ChatGPT. Beginning speech synthesis.")
212+
speak(chat_response)
213+
print("Done speaking.")
214+
else:
215+
print("Partial Phrase...")
216+
transcription[-1] = text
217+
218+
os.system("clear")
219+
for line in transcription:
220+
print(line)
221+
print("", end="", flush=True)
222+
sleep(0.25)
223+
except (AssertionError, AttributeError):
224+
pass
225+
except KeyboardInterrupt:
226+
break
227+
228+
print("\n\nTranscription:")
229+
for line in transcription:
230+
print(line)
231+
232+
233+
if __name__ == "__main__":
234+
main()

0 commit comments

Comments
 (0)