-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathjarvis.py
More file actions
173 lines (144 loc) · 5.98 KB
/
jarvis.py
File metadata and controls
173 lines (144 loc) · 5.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import datetime
import os
import io
import numpy as np
import sounddevice as sd
import soundfile as sf
from openai import OpenAI
from dotenv import load_dotenv
import pyautogui
import time
# Load OPENAI_API_KEY from .env file
load_dotenv()
client = OpenAI()
def start_recording(): # Function to start recording audio
global recording, frames_list
print("Recording started. Press Enter to stop.")
frames_list = []
recording = True
sd.default.samplerate = 44100 # Sample rate
sd.default.channels = 1 # Number of audio channels
def callback(indata, frames, time, status):
frames_list.append(indata.copy())
with sd.InputStream(callback=callback):
input() # Wait for user to press Enter
stop_recording()
def stop_recording(): # Function to stop recording audio, save audio + call next function
global recording, desktop_path
print("Recording stopped.")
recording = False
sd.stop()
# Concatenate recorded frames
full_audio = np.concatenate(frames_list, axis=0)
# Save audio to desktop
now = datetime.datetime.now()
filename = f"audio_{now.strftime('%Y-%m-%d_%H-%M-%S')}.wav"
desktop_path = os.path.join(os.path.expanduser("~"), "Desktop")
file_path = os.path.join(desktop_path, filename)
# Use soundfile.write to save the audio
sf.write(file_path, full_audio, 44100)
print(f"Audio saved to {file_path}")
new_transcription = get_transcription(file_path)
print("Transcription: ", new_transcription)
gpt_response = ask_gpt(new_transcription)
print("GPT Response: ", gpt_response)
execute_commands(gpt_response)
def execute_commands(gpt_response):
lines = gpt_response.splitlines()
if (lines[0] == "whatsapp"):
send_whatsapp(lines[1], lines[2])
response_text = "I sent a WhatsApp to " + \
lines[1] + " saying " + lines[2]
respond_to_user(response_text)
if (lines[0] == "notes"):
write_note_in_apple_notes(lines[1], lines[2])
respond_to_user("I have taken a note.")
if (lines[0] == "question"):
text_after_question = gpt_response.split("question", 1)[-1].strip()
respond_to_user(text_after_question)
print("Press Enter to start recording")
def get_transcription(filename):
audio_file = open(filename, "rb")
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
return transcript.text
def ask_gpt(transcription):
"""Ask a question to GPT."""
instruction_prompt = '''You are a digital assistant system that only supports 3 functions - taking notes for the user, sending WhatsApp messages and answering questions. You will interpret the users's request and always start your response with either 'notes', 'whatsapp' or 'question' depending on what the user requests you to do. Then you will return a newline. If the user requests you take a note you will then return an appropriate title for the note followed by a newline and then the contents of the note. If the user requested to send a WhatsApp you will return the first name of the intended recipient followed by a newline and then the contents of the message body as it if was written in the first-person by the user with an appropriate emoji at the end. If the user just asked a general question like research or maths or fact just write a suitable answer based on your knowledge. You will always answer in this fashion. An example request might be "Please WhatsApp Mitch Williams and tell him I want to have fish for dinner as well as maybe some uh of that uh potato mash we had the other night" and you will respond with:
whatsapp
Mitch
Hey, please can we have fish and some of that leftover potato mash for dinner.💪🏼.
Thanks, now please answer the following user's request: '''
response = client.chat.completions.create(
model="gpt-4-0125-preview",
messages=[{"role": "system", "content": instruction_prompt},
{"role": "user", "content": transcription}]
)
return response.choices[0].message.content
def write_note_in_apple_notes(note_title, note_body):
"""Write a note in Apple Notes."""
# open notes
pyautogui.keyDown('command')
pyautogui.press('space')
pyautogui.keyUp('command')
pyautogui.typewrite("notes")
pyautogui.press('enter')
time.sleep(3)
# Create a new note
pyautogui.keyDown('command')
pyautogui.press('n')
pyautogui.keyUp('command')
pyautogui.typewrite(note_title)
pyautogui.press('enter')
pyautogui.typewrite(note_body)
# Close Notes
pyautogui.keyDown('command')
pyautogui.press('w')
pyautogui.keyUp('command')
def send_whatsapp(recipient, whatsapp_message):
"""Send a message to someone in Whatsapp Web"""
# open whatsapp
pyautogui.keyDown('command')
pyautogui.press('space')
pyautogui.keyUp('command')
pyautogui.typewrite("whatsapp")
pyautogui.press('enter')
time.sleep(1)
# Search for recipient
pyautogui.keyDown('command')
pyautogui.press('f')
pyautogui.keyUp('command')
pyautogui.typewrite(recipient)
pyautogui.press('tab')
time.sleep(0.5)
pyautogui.press('tab')
pyautogui.press('enter')
pyautogui.typewrite(whatsapp_message)
pyautogui.press('enter')
# Close whatsapp
pyautogui.keyDown('command')
pyautogui.press('w')
pyautogui.keyUp('command')
def respond_to_user(text_response): # tell the user audibly what you did
response = client.audio.speech.create(
model="tts-1",
voice="shimmer",
input=text_response
)
audio_stream = response.content
audio_data, samplerate = sf.read(io.BytesIO(audio_stream), dtype='float32')
sd.play(audio_data, samplerate)
sd.wait() # Wait until the audio is finished playing
# Main loop
recording = False
frames_list = []
desktop_path = ""
print("Press Enter to start recording")
while True:
input() # Wait for user to press Enter
if not recording:
start_recording()
else:
stop_recording()