-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
217 lines (203 loc) · 7.62 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
// Import the web socket library
const WebSocket = require("ws");
// Load the .env file into memory so the code has access to the key
const dotenv = require("dotenv");
dotenv.config();
const Speaker = require("speaker");
const record = require("node-record-lpcm16");
// Function to start recording audio
function startRecording() {
return new Promise((resolve, reject) => {
console.log("Speak to send a message to the assistant. Press Enter when done.");
// Create a buffer to hold the audio data
const audioData = [];
// Start recording in PCM16 format
const recordingStream = record.record({
sampleRate: 16000, // 16kHz sample rate (standard for speech recognition)
threshold: 0, // Start recording immediately
verbose: false,
recordProgram: "sox", // Specify the program
});
// Capture audio data
recordingStream.stream().on("data", (chunk) => {
audioData.push(chunk); // Store the audio chunks
});
// Handle errors in the recording stream
recordingStream.stream().on("error", (err) => {
console.error("Error in recording stream:", err);
reject(err);
});
// Set up standard input to listen for the Enter key press
process.stdin.resume(); // Start listening to stdin
process.stdin.on("data", () => {
console.log("Recording stopped.");
recordingStream.stop(); // Correctly stop the recording stream
process.stdin.pause(); // Stop listening to stdin
// Convert audio data to a single Buffer
const audioBuffer = Buffer.concat(audioData);
// Convert the Buffer to Base64
const base64Audio = audioBuffer.toString("base64");
resolve(base64Audio); // Resolve the promise with Base64 audio
});
});
}
const functions = {
calculate_sum: (args) => args.a + args.b,
set_memory: (args) => console.log(`Saving memory ${args}`)
}
const sumTool = {
type: "function",
name: "calculate_sum",
description: "Use this function when asked to add numbers together, for example when asked 'What's 4 + 6'?.",
parameters: {
type: "object",
properties: {
"a": { "type": "number" },
"b": { "type": "number" }
},
required: ["a", "b"]
}
}
const setMemory = {
type: "function",
name: 'set_memory',
description: 'Saves important data about the user into memory.',
parameters: {
type: 'object',
properties: {
key: {
type: 'string',
description:
'The key of the memory value. Always use lowercase and underscores, no other characters.',
},
value: {
type: 'string',
description: 'Value can be anything represented as a string',
},
},
required: ['key', 'value'],
},
}
// Add to the main() function after ws is initialized
const speaker = new Speaker({
channels: 1, // Mono or Stereo
bitDepth: 16, // PCM16 (16-bit audio)
sampleRate: 24000, // Common sample rate (44.1kHz)
});
function main() {
// Connect to the API
const url = "wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01";
const ws = new WebSocket(url, {
headers: {
"Authorization": "Bearer " + process.env.OPENAI_API_KEY,
"OpenAI-Beta": "realtime=v1",
},
});
// Add inside the main() function of index.js after creating ws
async function handleOpen() {
console.log("Connection is opened");
// For text based conversations
// const createConversationEvent = {
// type: "conversation.item.create",
// item: {
// type: "message",
// role: "user",
// content: [
// {
// type: "input_text",
// text: "Explain in one sentence what a web socket is"
// }
// ]
// }
// };
// For audio based conversations input
const base64AudioData = await startRecording();
const createConversationEvent = {
type: "conversation.item.create",
item: {
type: "message",
role: "user",
content: [
{
type: "input_audio",
audio: base64AudioData,
},
],
},
};
ws.send(JSON.stringify(createConversationEvent));
const createResponseEvent = {
type: "response.create",
response: {
modalities: ["text", "audio"],
instructions: "Please assist with the user's question and make friendly conversation with them. If they ask you a question first retrieve their stored memories before answering or asking them for more details. Also store anything memorable that they said about themselves like their likes or dislikes, their preferences or any interesting event that happened in their life'",
tools: [sumTool, setMemory], // New for tool calling
tool_choice: "auto", // New
}
}
ws.send(JSON.stringify(createResponseEvent));
}
ws.on("open", handleOpen);
async function handleMessage(messageStr) {
const message = JSON.parse(messageStr);
// Define what happens when a message is received
console.log(`-- ${message.type} --`);
switch(message.type) {
case "response.text.delta":
// We got a new text chunk, print it
process.stdout.write(message.delta);
break;
case "response.audio.delta":
// We got a new audio chunk
const base64AudioChunk = message.delta;
const audioBuffer = Buffer.from(base64AudioChunk, "base64");
speaker.write(audioBuffer);
break;
case "response.function_call_arguments.done":
console.log(`Using function ${message.name} with arguments ${message.arguments}`);
// 1. Get the function information and call the function
const function_name = message.name;
const function_arguments = JSON.parse(message.arguments);
const result = functions[function_name](function_arguments);
console.log(`Got result ${result}`);
// 2. Send the result of the function call
const functionOutputEvent = {
type: "conversation.item.create",
item: {
type: "function_call_output",
role: "system",
output: `${result}`,
}
};
ws.send(JSON.stringify(functionOutputEvent));
// 3. Request a response
ws.send(JSON.stringify({type: "response.create"}));
break;
case "response.output_item.done":
console.log(message);
break;
case "error":
console.log(message);
break;
case "response.text.done":
// The text is complete, print a new line
process.stdout.write("\n");
break;
case "response.audio.done":
// Response complete, close the socket
speaker.end()
ws.close();
break;
}
}
ws.on("message", handleMessage);
async function handleClose() {
console.log("Socket closed");
}
ws.on("close", handleClose);
async function handleError(error) {
console.log("Error", error);
}
ws.on("error", handleError);
}
main();