|
| 1 | +// javascript |
| 2 | +// |
| 3 | +// This script connects to the OpenAI Realtime API to create a voice-based assistant. |
| 4 | +// |
| 5 | +// It captures audio input from your microphone, sends it to the OpenAI API for processing, |
| 6 | +// and plays back the assistant's audio response through your speakers. |
| 7 | +// |
| 8 | +// **How to Run on a Mac:** |
| 9 | +// |
| 10 | +// 1. **Install Dependencies:** |
| 11 | +// - Ensure you have Node.js and npm installed. |
| 12 | +// - Run `npm init & npm install` to install all required packages. |
| 13 | +// |
| 14 | +// 2. **Set Up Environment Variables:** |
| 15 | +// - Create a `.env` file in the same directory as this script. |
| 16 | +// - Add your OpenAI API key to the `.env` file: |
| 17 | +// ``` |
| 18 | +// OPENAI_API_KEY=your_api_key_here |
| 19 | +// ``` |
| 20 | +// |
| 21 | +// 3. **Run the Script:** |
| 22 | +// - Execute the script with the command `node node_devenv.mjs`. |
| 23 | +// |
| 24 | +// **Note:** Make sure your microphone and speakers are properly configured and accessible on your Mac. |
| 25 | +// |
| 26 | + |
| 27 | +import { RealtimeClient } from '@openai/realtime-api-beta'; |
| 28 | +import mic from 'mic'; |
| 29 | +import { Readable } from 'stream'; |
| 30 | +import Speaker from 'speaker'; |
| 31 | +import dotenv from 'dotenv'; |
| 32 | + |
| 33 | +dotenv.config(); |
| 34 | + |
| 35 | +const API_KEY = process.env.OPENAI_API_KEY; |
| 36 | + |
| 37 | +if (!API_KEY) { |
| 38 | + console.error('Please set your OPENAI_API_KEY in your environment variables.'); |
| 39 | + process.exit(1); |
| 40 | +} |
| 41 | + |
| 42 | +const client = new RealtimeClient({ |
| 43 | + apiKey: API_KEY, |
| 44 | + model: 'gpt-4o-realtime-preview-2024-10-01', |
| 45 | +}); |
| 46 | + |
| 47 | +let micInstance; |
| 48 | +let speaker; |
| 49 | + |
| 50 | +async function main() { |
| 51 | + try { |
| 52 | + console.log('Attempting to connect...'); |
| 53 | + await client.connect(); |
| 54 | + startAudioStream(); |
| 55 | + console.log('Connection established successfully.'); |
| 56 | + } catch (error) { |
| 57 | + console.error('Error connecting to OpenAI Realtime API:', error); |
| 58 | + console.log('Connection attempt failed. Retrying in 5 seconds...'); |
| 59 | + setTimeout(main, 5000); |
| 60 | + } |
| 61 | +} |
| 62 | + |
| 63 | +main(); |
| 64 | + |
| 65 | +client.on('conversation.item.completed', ({ item }) => { |
| 66 | + console.log('Conversation item completed:', item); |
| 67 | + |
| 68 | + if (item.type === 'message' && item.role === 'assistant' && item.formatted && item.formatted.audio) { |
| 69 | + console.log('Playing audio response...'); |
| 70 | + playAudio(item.formatted.audio); |
| 71 | + } else { |
| 72 | + console.log('No audio content in this item.'); |
| 73 | + } |
| 74 | +}); |
| 75 | + |
| 76 | +// BEGIN MANAGE Mac AUDIO INTERFACES |
| 77 | + |
| 78 | +function startAudioStream() { |
| 79 | + try { |
| 80 | + micInstance = mic({ |
| 81 | + rate: '24000', |
| 82 | + channels: '1', |
| 83 | + debug: false, |
| 84 | + exitOnSilence: 6, |
| 85 | + fileType: 'raw', |
| 86 | + encoding: 'signed-integer', |
| 87 | + }); |
| 88 | + |
| 89 | + const micInputStream = micInstance.getAudioStream(); |
| 90 | + |
| 91 | + micInputStream.on('error', (error) => { |
| 92 | + console.error('Microphone error:', error); |
| 93 | + }); |
| 94 | + |
| 95 | + micInstance.start(); |
| 96 | + console.log('Microphone started streaming.'); |
| 97 | + |
| 98 | + let audioBuffer = Buffer.alloc(0); |
| 99 | + const chunkSize = 4800; // 0.2 seconds of audio at 24kHz |
| 100 | + |
| 101 | + micInputStream.on('data', (data) => { |
| 102 | + audioBuffer = Buffer.concat([audioBuffer, data]); |
| 103 | + |
| 104 | + while (audioBuffer.length >= chunkSize) { |
| 105 | + const chunk = audioBuffer.slice(0, chunkSize); |
| 106 | + audioBuffer = audioBuffer.slice(chunkSize); |
| 107 | + |
| 108 | + const int16Array = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.length / 2); |
| 109 | + |
| 110 | + try { |
| 111 | + client.appendInputAudio(int16Array); |
| 112 | + } catch (error) { |
| 113 | + console.error('Error sending audio data:', error); |
| 114 | + } |
| 115 | + } |
| 116 | + }); |
| 117 | + |
| 118 | + micInputStream.on('silence', () => { |
| 119 | + console.log('Silence detected, creating response...'); |
| 120 | + try { |
| 121 | + client.createResponse(); |
| 122 | + } catch (error) { |
| 123 | + console.error('Error creating response:', error); |
| 124 | + } |
| 125 | + }); |
| 126 | + } catch (error) { |
| 127 | + console.error('Error starting audio stream:', error); |
| 128 | + } |
| 129 | +} |
| 130 | + |
| 131 | +function playAudio(audioData) { |
| 132 | + try { |
| 133 | + if (!speaker) { |
| 134 | + speaker = new Speaker({ |
| 135 | + channels: 1, |
| 136 | + bitDepth: 16, |
| 137 | + sampleRate: 24000, |
| 138 | + }); |
| 139 | + } |
| 140 | + |
| 141 | + // Convert Int16Array to Buffer |
| 142 | + const buffer = Buffer.from(audioData.buffer); |
| 143 | + |
| 144 | + // Create a readable stream from the buffer |
| 145 | + const readableStream = new Readable({ |
| 146 | + read() { |
| 147 | + this.push(buffer); |
| 148 | + this.push(null); |
| 149 | + }, |
| 150 | + }); |
| 151 | + |
| 152 | + // Pipe the stream to the speaker |
| 153 | + readableStream.pipe(speaker); |
| 154 | + console.log('Audio sent to speaker for playback. Buffer length:', buffer.length); |
| 155 | + |
| 156 | + // Handle the 'close' event to recreate the speaker for the next playback |
| 157 | + speaker.on('close', () => { |
| 158 | + console.log('Speaker closed. Recreating for next playback.'); |
| 159 | + speaker = null; |
| 160 | + }); |
| 161 | + } catch (error) { |
| 162 | + console.error('Error playing audio:', error); |
| 163 | + } |
| 164 | +} |
| 165 | + |
| 166 | +// END MANAGE AUDIO INTERFACES |
0 commit comments