diff --git a/.gitignore b/.gitignore index 62c34036f3..61406464fa 100644 --- a/.gitignore +++ b/.gitignore @@ -10,11 +10,11 @@ __pycache__/ .Python build/ develop-eggs/ -dist/ +/dist/ downloads/ eggs/ .eggs/ -lib/ +/lib/ lib64 parts/ sdist/ diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api.mdx b/examples/voice_solutions/one_way_translation_using_realtime_api.mdx index ce833b5855..82f841186d 100644 --- a/examples/voice_solutions/one_way_translation_using_realtime_api.mdx +++ b/examples/voice_solutions/one_way_translation_using_realtime_api.mdx @@ -7,8 +7,7 @@ This cookbook demonstrates how to use OpenAI's [ Realtime API](https://platform. A real-world use case for this demo is a multilingual, conversational translation where a speaker talks into the speaker app and listeners hear translations in their selected native language via the listener app. Imagine a conference room with a speaker talking in English and a participant with headphones in choosing to listen to a Tagalog translation. Due to the current turn-based nature of audio models, the speaker must pause briefly to allow the model to process and translate speech. However, as models become faster and more efficient, this latency will decrease significantly and the translation will become more seamless. -Let's explore the main functionalities and code snippets that illustrate how the app works. You can find the code in the [accompanying repo](https://github.com/openai/openai-cookbook/tree/main/examples/voice_solutions/one_way_translation_using_realtime_api/README.md -) if you want to run the app locally. +Let's explore the main functionalities and code snippets that illustrate how the app works. You can find the code in the [accompanying repo](https://github.com/openai/openai-cookbook/tree/main/examples/voice_solutions/one_way_translation_using_realtime_api) if you want to run the app locally. ## High Level Architecture Overview diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/relay-server/lib/relay.js b/examples/voice_solutions/one_way_translation_using_realtime_api/relay-server/lib/relay.js new file mode 100644 index 0000000000..ef444146d7 --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/relay-server/lib/relay.js @@ -0,0 +1,84 @@ +import { WebSocketServer } from 'ws'; +import { RealtimeClient } from '@openai/realtime-api-beta'; + +export class RealtimeRelay { + constructor(apiKey) { + this.apiKey = apiKey; + this.sockets = new WeakMap(); + this.wss = null; + } + + listen(port) { + this.wss = new WebSocketServer({ port }); + this.wss.on('connection', this.connectionHandler.bind(this)); + this.log(`Listening on ws://localhost:${port}`); + } + + async connectionHandler(ws, req) { + if (!req.url) { + this.log('No URL provided, closing connection.'); + ws.close(); + return; + } + + const url = new URL(req.url, `http://${req.headers.host}`); + const pathname = url.pathname; + + if (pathname !== '/') { + this.log(`Invalid pathname: "${pathname}"`); + ws.close(); + return; + } + + // Instantiate new client + this.log(`Connecting with key "${this.apiKey.slice(0, 3)}..."`); + const client = new RealtimeClient({ apiKey: this.apiKey }); + + // Relay: OpenAI Realtime API Event -> Browser Event + client.realtime.on('server.*', (event) => { + this.log(`Relaying "${event.type}" to Client`); + ws.send(JSON.stringify(event)); + }); + client.realtime.on('close', () => ws.close()); + + // Relay: Browser Event -> OpenAI Realtime API Event + // We need to queue data waiting for the OpenAI connection + const messageQueue = []; + const messageHandler = (data) => { + try { + const event = JSON.parse(data); + this.log(`Relaying "${event.type}" to OpenAI`); + client.realtime.send(event.type, event); + } catch (e) { + console.error(e.message); + this.log(`Error parsing event from client: ${data}`); + } + }; + ws.on('message', (data) => { + if (!client.isConnected()) { + messageQueue.push(data); + } else { + messageHandler(data); + } + }); + ws.on('close', () => client.disconnect()); + + // Connect to OpenAI Realtime API + try { + this.log(`Connecting to OpenAI...`); + await client.connect(); + } catch (e) { + this.log(`Error connecting to OpenAI: ${e.message}`); + ws.close(); + return; + } + this.log(`Connected to OpenAI successfully!`); + while (messageQueue.length) { + messageHandler(messageQueue.shift()); + } + } + + log(...args) { + console.log(`[RealtimeRelay]`, ...args); + } +} diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/index.d.ts b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/index.d.ts new file mode 100644 index 0000000000..952953208e --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/index.d.ts @@ -0,0 +1,6 @@ +import { AudioAnalysis } from './lib/analysis/audio_analysis.js'; +import { WavPacker } from './lib/wav_packer.js'; +import { WavStreamPlayer } from './lib/wav_stream_player.js'; +import { WavRecorder } from './lib/wav_recorder.js'; +export { AudioAnalysis, WavPacker, WavStreamPlayer, WavRecorder }; +//# sourceMappingURL=index.d.ts.map \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/index.d.ts.map b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/index.d.ts.map new file mode 100644 index 0000000000..a80c055fdc --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/index.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.js"],"names":[],"mappings":"8BAC8B,kCAAkC;0BADtC,qBAAqB;gCAEf,4BAA4B;4BAChC,uBAAuB"} \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/analysis/audio_analysis.d.ts b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/analysis/audio_analysis.d.ts new file mode 100644 index 0000000000..fc50758964 --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/analysis/audio_analysis.d.ts @@ -0,0 +1,70 @@ +/** + * Output of AudioAnalysis for the frequency domain of the audio + * @typedef {Object} AudioAnalysisOutputType + * @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive + * @property {number[]} frequencies Raw frequency bucket values + * @property {string[]} labels Labels for the frequency bucket values + */ +/** + * Analyzes audio for visual output + * @class + */ +export class AudioAnalysis { + /** + * Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range + * returns human-readable formatting and labels + * @param {AnalyserNode} analyser + * @param {number} sampleRate + * @param {Float32Array} [fftResult] + * @param {"frequency"|"music"|"voice"} [analysisType] + * @param {number} [minDecibels] default -100 + * @param {number} [maxDecibels] default -30 + * @returns {AudioAnalysisOutputType} + */ + static getFrequencies(analyser: AnalyserNode, sampleRate: number, fftResult?: Float32Array, analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): AudioAnalysisOutputType; + /** + * Creates a new AudioAnalysis instance for an HTMLAudioElement + * @param {HTMLAudioElement} audioElement + * @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer + * @returns {AudioAnalysis} + */ + constructor(audioElement: HTMLAudioElement, audioBuffer?: AudioBuffer | null); + fftResults: any[]; + audio: HTMLAudioElement; + context: any; + analyser: any; + sampleRate: any; + audioBuffer: any; + /** + * Gets the current frequency domain data from the playing audio track + * @param {"frequency"|"music"|"voice"} [analysisType] + * @param {number} [minDecibels] default -100 + * @param {number} [maxDecibels] default -30 + * @returns {AudioAnalysisOutputType} + */ + getFrequencies(analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): AudioAnalysisOutputType; + /** + * Resume the internal AudioContext if it was suspended due to the lack of + * user interaction when the AudioAnalysis was instantiated. + * @returns {Promise} + */ + resumeIfSuspended(): Promise; +} +/** + * Output of AudioAnalysis for the frequency domain of the audio + */ +export type AudioAnalysisOutputType = { + /** + * Amplitude of this frequency between {0, 1} inclusive + */ + values: Float32Array; + /** + * Raw frequency bucket values + */ + frequencies: number[]; + /** + * Labels for the frequency bucket values + */ + labels: string[]; +}; +//# sourceMappingURL=audio_analysis.d.ts.map \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/analysis/audio_analysis.d.ts.map b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/analysis/audio_analysis.d.ts.map new file mode 100644 index 0000000000..abb292bd75 --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/analysis/audio_analysis.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"audio_analysis.d.ts","sourceRoot":"","sources":["../../../lib/analysis/audio_analysis.js"],"names":[],"mappings":"AAOA;;;;;;GAMG;AAEH;;;GAGG;AACH;IACE;;;;;;;;;;OAUG;IACH,gCARW,YAAY,cACZ,MAAM,cACN,YAAY,iBACZ,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,uBAAuB,CAwDnC;IAED;;;;;OAKG;IACH,0BAJW,gBAAgB,gBAChB,WAAW,GAAC,IAAI,EAkE1B;IA9DC,kBAAoB;IA2ClB,wBAAyB;IACzB,aAAkC;IAClC,cAAwB;IACxB,gBAA4B;IAC5B,iBAA8B;IAiBlC;;;;;;OAMG;IACH,8BALW,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,uBAAuB,CAwBnC;IAED;;;;OAIG;IACH,qBAFa,OAAO,CAAC,IAAI,CAAC,CAOzB;CACF;;;;;;;;YA9La,YAAY;;;;iBACZ,MAAM,EAAE;;;;YACR,MAAM,EAAE"} \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/analysis/constants.d.ts b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/analysis/constants.d.ts new file mode 100644 index 0000000000..868ba1593e --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/analysis/constants.d.ts @@ -0,0 +1,9 @@ +/** + * All note frequencies from 1st to 8th octave + * in format "A#8" (A#, 8th octave) + */ +export const noteFrequencies: any[]; +export const noteFrequencyLabels: any[]; +export const voiceFrequencies: any[]; +export const voiceFrequencyLabels: any[]; +//# sourceMappingURL=constants.d.ts.map \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/analysis/constants.d.ts.map b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/analysis/constants.d.ts.map new file mode 100644 index 0000000000..0f5d851092 --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/analysis/constants.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../../../lib/analysis/constants.js"],"names":[],"mappings":"AA6BA;;;GAGG;AACH,oCAAkC;AAClC,wCAAsC;AActC,qCAKG;AACH,yCAKG"} \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_packer.d.ts b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_packer.d.ts new file mode 100644 index 0000000000..4fe1187422 --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_packer.d.ts @@ -0,0 +1,58 @@ +/** + * Raw wav audio file contents + * @typedef {Object} WavPackerAudioType + * @property {Blob} blob + * @property {string} url + * @property {number} channelCount + * @property {number} sampleRate + * @property {number} duration + */ +/** + * Utility class for assembling PCM16 "audio/wav" data + * @class + */ +export class WavPacker { + /** + * Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format + * @param {Float32Array} float32Array + * @returns {ArrayBuffer} + */ + static floatTo16BitPCM(float32Array: Float32Array): ArrayBuffer; + /** + * Concatenates two ArrayBuffers + * @param {ArrayBuffer} leftBuffer + * @param {ArrayBuffer} rightBuffer + * @returns {ArrayBuffer} + */ + static mergeBuffers(leftBuffer: ArrayBuffer, rightBuffer: ArrayBuffer): ArrayBuffer; + /** + * Packs data into an Int16 format + * @private + * @param {number} size 0 = 1x Int16, 1 = 2x Int16 + * @param {number} arg value to pack + * @returns + */ + private _packData; + /** + * Packs audio into "audio/wav" Blob + * @param {number} sampleRate + * @param {{bitsPerSample: number, channels: Array, data: Int16Array}} audio + * @returns {WavPackerAudioType} + */ + pack(sampleRate: number, audio: { + bitsPerSample: number; + channels: Array; + data: Int16Array; + }): WavPackerAudioType; +} +/** + * Raw wav audio file contents + */ +export type WavPackerAudioType = { + blob: Blob; + url: string; + channelCount: number; + sampleRate: number; + duration: number; +}; +//# sourceMappingURL=wav_packer.d.ts.map \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_packer.d.ts.map b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_packer.d.ts.map new file mode 100644 index 0000000000..96477a971c --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_packer.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"wav_packer.d.ts","sourceRoot":"","sources":["../../lib/wav_packer.js"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH;;;GAGG;AACH;IACE;;;;OAIG;IACH,qCAHW,YAAY,GACV,WAAW,CAWvB;IAED;;;;;OAKG;IACH,gCAJW,WAAW,eACX,WAAW,GACT,WAAW,CASvB;IAED;;;;;;OAMG;IACH,kBAKC;IAED;;;;;OAKG;IACH,iBAJW,MAAM,SACN;QAAC,aAAa,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,KAAK,CAAC,YAAY,CAAC,CAAC;QAAC,IAAI,EAAE,UAAU,CAAA;KAAC,GACtE,kBAAkB,CA6C9B;CACF;;;;;UA3Ga,IAAI;SACJ,MAAM;kBACN,MAAM;gBACN,MAAM;cACN,MAAM"} \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_recorder.d.ts b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_recorder.d.ts new file mode 100644 index 0000000000..03cd269828 --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_recorder.d.ts @@ -0,0 +1,167 @@ +/** + * Decodes audio into a wav file + * @typedef {Object} DecodedAudioType + * @property {Blob} blob + * @property {string} url + * @property {Float32Array} values + * @property {AudioBuffer} audioBuffer + */ +/** + * Records live stream of user audio as PCM16 "audio/wav" data + * @class + */ +export class WavRecorder { + /** + * Decodes audio data from multiple formats to a Blob, url, Float32Array and AudioBuffer + * @param {Blob|Float32Array|Int16Array|ArrayBuffer|number[]} audioData + * @param {number} sampleRate + * @param {number} fromSampleRate + * @returns {Promise} + */ + static decode(audioData: Blob | Float32Array | Int16Array | ArrayBuffer | number[], sampleRate?: number, fromSampleRate?: number): Promise; + /** + * Create a new WavRecorder instance + * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options] + * @returns {WavRecorder} + */ + constructor({ sampleRate, outputToSpeakers, debug, }?: { + sampleRate?: number; + outputToSpeakers?: boolean; + debug?: boolean; + }); + scriptSrc: any; + sampleRate: number; + outputToSpeakers: boolean; + debug: boolean; + _deviceChangeCallback: () => Promise; + _devices: any[]; + stream: any; + processor: any; + source: any; + node: any; + recording: boolean; + _lastEventId: number; + eventReceipts: {}; + eventTimeout: number; + _chunkProcessor: () => void; + _chunkProcessorBuffer: { + raw: ArrayBuffer; + mono: ArrayBuffer; + }; + /** + * Logs data in debug mode + * @param {...any} arguments + * @returns {true} + */ + log(...args: any[]): true; + /** + * Retrieves the current sampleRate for the recorder + * @returns {number} + */ + getSampleRate(): number; + /** + * Retrieves the current status of the recording + * @returns {"ended"|"paused"|"recording"} + */ + getStatus(): "ended" | "paused" | "recording"; + /** + * Sends an event to the AudioWorklet + * @private + * @param {string} name + * @param {{[key: string]: any}} data + * @param {AudioWorkletNode} [_processor] + * @returns {Promise<{[key: string]: any}>} + */ + private _event; + /** + * Sets device change callback, remove if callback provided is `null` + * @param {(Array): void|null} callback + * @returns {true} + */ + listenForDeviceChange(callback: any): true; + /** + * Manually request permission to use the microphone + * @returns {Promise} + */ + requestPermission(): Promise; + /** + * List all eligible devices for recording, will request permission to use microphone + * @returns {Promise>} + */ + listDevices(): Promise>; + /** + * Begins a recording session and requests microphone permissions if not already granted + * Microphone recording indicator will appear on browser tab but status will be "paused" + * @param {string} [deviceId] if no device provided, default device will be used + * @returns {Promise} + */ + begin(deviceId?: string): Promise; + analyser: any; + /** + * Gets the current frequency domain data from the recording track + * @param {"frequency"|"music"|"voice"} [analysisType] + * @param {number} [minDecibels] default -100 + * @param {number} [maxDecibels] default -30 + * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType} + */ + getFrequencies(analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): import("./analysis/audio_analysis.js").AudioAnalysisOutputType; + /** + * Pauses the recording + * Keeps microphone stream open but halts storage of audio + * @returns {Promise} + */ + pause(): Promise; + /** + * Start recording stream and storing to memory from the connected audio source + * @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor] + * @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio + * @returns {Promise} + */ + record(chunkProcessor?: (data: { + mono: Int16Array; + raw: Int16Array; + }) => any, chunkSize?: number): Promise; + _chunkProcessorSize: number; + /** + * Clears the audio buffer, empties stored recording + * @returns {Promise} + */ + clear(): Promise; + /** + * Reads the current audio stream data + * @returns {Promise<{meanValues: Float32Array, channels: Array}>} + */ + read(): Promise<{ + meanValues: Float32Array; + channels: Array; + }>; + /** + * Saves the current audio stream to a file + * @param {boolean} [force] Force saving while still recording + * @returns {Promise} + */ + save(force?: boolean): Promise; + /** + * Ends the current recording session and saves the result + * @returns {Promise} + */ + end(): Promise; + /** + * Performs a full cleanup of WavRecorder instance + * Stops actively listening via microphone and removes existing listeners + * @returns {Promise} + */ + quit(): Promise; +} +/** + * Decodes audio into a wav file + */ +export type DecodedAudioType = { + blob: Blob; + url: string; + values: Float32Array; + audioBuffer: AudioBuffer; +}; +//# sourceMappingURL=wav_recorder.d.ts.map \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_recorder.d.ts.map b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_recorder.d.ts.map new file mode 100644 index 0000000000..7954106e49 --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_recorder.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"wav_recorder.d.ts","sourceRoot":"","sources":["../../lib/wav_recorder.js"],"names":[],"mappings":"AAIA;;;;;;;GAOG;AAEH;;;GAGG;AACH;IAsCE;;;;;;OAMG;IACH,yBALW,IAAI,GAAC,YAAY,GAAC,UAAU,GAAC,WAAW,GAAC,MAAM,EAAE,eACjD,MAAM,mBACN,MAAM,GACJ,OAAO,CAAC,gBAAgB,CAAC,CAqErC;IA/GD;;;;OAIG;IACH,uDAHW;QAAC,UAAU,CAAC,EAAE,MAAM,CAAC;QAAC,gBAAgB,CAAC,EAAE,OAAO,CAAC;QAAC,KAAK,CAAC,EAAE,OAAO,CAAA;KAAC,EAiC5E;IAxBC,eAAkC;IAElC,mBAA4B;IAC5B,0BAAwC;IACxC,eAAoB;IACpB,2CAAiC;IACjC,gBAAkB;IAElB,YAAkB;IAClB,eAAqB;IACrB,YAAkB;IAClB,UAAgB;IAChB,mBAAsB;IAEtB,qBAAqB;IACrB,kBAAuB;IACvB,qBAAwB;IAExB,4BAA+B;IAE/B;;;MAGC;IA+EH;;;;OAIG;IACH,qBAFa,IAAI,CAOhB;IAED;;;OAGG;IACH,iBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,aAFa,OAAO,GAAC,QAAQ,GAAC,WAAW,CAUxC;IAED;;;;;;;OAOG;IACH,eAqBC;IAED;;;;OAIG;IACH,sCAFa,IAAI,CAmChB;IAED;;;OAGG;IACH,qBAFa,OAAO,CAAC,IAAI,CAAC,CAoBzB;IAED;;;OAGG;IACH,eAFa,OAAO,CAAC,KAAK,CAAC,eAAe,GAAG;QAAC,OAAO,EAAE,OAAO,CAAA;KAAC,CAAC,CAAC,CA8BhE;IAED;;;;;OAKG;IACH,iBAHW,MAAM,GACJ,OAAO,CAAC,IAAI,CAAC,CAkFzB;IAHC,cAAwB;IAK1B;;;;;;OAMG;IACH,8BALW,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,OAAO,8BAA8B,EAAE,uBAAuB,CAkB1E;IAED;;;;OAIG;IACH,SAFa,OAAO,CAAC,IAAI,CAAC,CAezB;IAED;;;;;OAKG;IACH,wBAJW,CAAC,IAAI,EAAE;QAAE,IAAI,EAAE,UAAU,CAAC;QAAC,GAAG,EAAE,UAAU,CAAA;KAAE,KAAK,GAAG,cACpD,MAAM,GACJ,OAAO,CAAC,IAAI,CAAC,CAoBzB;IATC,4BAAoC;IAWtC;;;OAGG;IACH,SAFa,OAAO,CAAC,IAAI,CAAC,CAQzB;IAED;;;OAGG;IACH,QAFa,OAAO,CAAC;QAAC,UAAU,EAAE,YAAY,CAAC;QAAC,QAAQ,EAAE,KAAK,CAAC,YAAY,CAAC,CAAA;KAAC,CAAC,CAS9E;IAED;;;;OAIG;IACH,aAHW,OAAO,GACL,OAAO,CAAC,OAAO,iBAAiB,EAAE,kBAAkB,CAAC,CAgBjE;IAED;;;OAGG;IACH,OAFa,OAAO,CAAC,OAAO,iBAAiB,EAAE,kBAAkB,CAAC,CA8BjE;IAED;;;;OAIG;IACH,QAFa,OAAO,CAAC,IAAI,CAAC,CAQzB;CACF;;;;;UA1hBa,IAAI;SACJ,MAAM;YACN,YAAY;iBACZ,WAAW"} \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_stream_player.d.ts b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_stream_player.d.ts new file mode 100644 index 0000000000..91a2263fdc --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_stream_player.d.ts @@ -0,0 +1,69 @@ +/** + * Plays audio streams received in raw PCM16 chunks from the browser + * @class + */ +export class WavStreamPlayer { + /** + * Creates a new WavStreamPlayer instance + * @param {{sampleRate?: number}} options + * @returns {WavStreamPlayer} + */ + constructor({ sampleRate }?: { + sampleRate?: number; + }); + scriptSrc: any; + sampleRate: number; + context: any; + stream: any; + analyser: any; + trackSampleOffsets: {}; + interruptedTrackIds: {}; + /** + * Connects the audio context and enables output to speakers + * @returns {Promise} + */ + connect(): Promise; + /** + * Gets the current frequency domain data from the playing track + * @param {"frequency"|"music"|"voice"} [analysisType] + * @param {number} [minDecibels] default -100 + * @param {number} [maxDecibels] default -30 + * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType} + */ + getFrequencies(analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): import("./analysis/audio_analysis.js").AudioAnalysisOutputType; + /** + * Starts audio streaming + * @private + * @returns {Promise} + */ + private _start; + /** + * Adds 16BitPCM data to the currently playing audio stream + * You can add chunks beyond the current play point and they will be queued for play + * @param {ArrayBuffer|Int16Array} arrayBuffer + * @param {string} [trackId] + * @returns {Int16Array} + */ + add16BitPCM(arrayBuffer: ArrayBuffer | Int16Array, trackId?: string): Int16Array; + /** + * Gets the offset (sample count) of the currently playing stream + * @param {boolean} [interrupt] + * @returns {{trackId: string|null, offset: number, currentTime: number}} + */ + getTrackSampleOffset(interrupt?: boolean): { + trackId: string | null; + offset: number; + currentTime: number; + }; + /** + * Strips the current stream and returns the sample offset of the audio + * @param {boolean} [interrupt] + * @returns {{trackId: string|null, offset: number, currentTime: number}} + */ + interrupt(): { + trackId: string | null; + offset: number; + currentTime: number; + }; +} +//# sourceMappingURL=wav_stream_player.d.ts.map \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_stream_player.d.ts.map b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_stream_player.d.ts.map new file mode 100644 index 0000000000..500126ccd5 --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_stream_player.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"wav_stream_player.d.ts","sourceRoot":"","sources":["../../lib/wav_stream_player.js"],"names":[],"mappings":"AAGA;;;GAGG;AACH;IACE;;;;OAIG;IACH,6BAHW;QAAC,UAAU,CAAC,EAAE,MAAM,CAAA;KAAC,EAW/B;IAPC,eAAmC;IACnC,mBAA4B;IAC5B,aAAmB;IACnB,YAAkB;IAClB,cAAoB;IACpB,uBAA4B;IAC5B,wBAA6B;IAG/B;;;OAGG;IACH,WAFa,OAAO,CAAC,IAAI,CAAC,CAkBzB;IAED;;;;;;OAMG;IACH,8BALW,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,OAAO,8BAA8B,EAAE,uBAAuB,CAkB1E;IAED;;;;OAIG;IACH,eAkBC;IAED;;;;;;OAMG;IACH,yBAJW,WAAW,GAAC,UAAU,YACtB,MAAM,GACJ,UAAU,CAqBtB;IAED;;;;OAIG;IACH,iCAHW,OAAO,GACL;QAAC,OAAO,EAAE,MAAM,GAAC,IAAI,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAC,CAqBvE;IAED;;;;OAIG;IACH,aAFa;QAAC,OAAO,EAAE,MAAM,GAAC,IAAI,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAC,CAIvE;CACF"} \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/worklets/audio_processor.d.ts b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/worklets/audio_processor.d.ts new file mode 100644 index 0000000000..8b7c8acc7b --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/worklets/audio_processor.d.ts @@ -0,0 +1,2 @@ +export const AudioProcessorSrc: any; +//# sourceMappingURL=audio_processor.d.ts.map \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/worklets/audio_processor.d.ts.map b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/worklets/audio_processor.d.ts.map new file mode 100644 index 0000000000..d651100322 --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/worklets/audio_processor.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"audio_processor.d.ts","sourceRoot":"","sources":["../../../lib/worklets/audio_processor.js"],"names":[],"mappings":"AAqNA,oCAAqC"} \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/worklets/stream_processor.d.ts b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/worklets/stream_processor.d.ts new file mode 100644 index 0000000000..627da71b7d --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/worklets/stream_processor.d.ts @@ -0,0 +1,3 @@ +export const StreamProcessorWorklet: "\nclass StreamProcessor extends AudioWorkletProcessor {\n constructor() {\n super();\n this.hasStarted = false;\n this.hasInterrupted = false;\n this.outputBuffers = [];\n this.bufferLength = 128;\n this.write = { buffer: new Float32Array(this.bufferLength), trackId: null };\n this.writeOffset = 0;\n this.trackSampleOffsets = {};\n this.port.onmessage = (event) => {\n if (event.data) {\n const payload = event.data;\n if (payload.event === 'write') {\n const int16Array = payload.buffer;\n const float32Array = new Float32Array(int16Array.length);\n for (let i = 0; i < int16Array.length; i++) {\n float32Array[i] = int16Array[i] / 0x8000; // Convert Int16 to Float32\n }\n this.writeData(float32Array, payload.trackId);\n } else if (\n payload.event === 'offset' ||\n payload.event === 'interrupt'\n ) {\n const requestId = payload.requestId;\n const trackId = this.write.trackId;\n const offset = this.trackSampleOffsets[trackId] || 0;\n this.port.postMessage({\n event: 'offset',\n requestId,\n trackId,\n offset,\n });\n if (payload.event === 'interrupt') {\n this.hasInterrupted = true;\n }\n } else {\n throw new Error(`Unhandled event \"${payload.event}\"`);\n }\n }\n };\n }\n\n writeData(float32Array, trackId = null) {\n let { buffer } = this.write;\n let offset = this.writeOffset;\n for (let i = 0; i < float32Array.length; i++) {\n buffer[offset++] = float32Array[i];\n if (offset >= buffer.length) {\n this.outputBuffers.push(this.write);\n this.write = { buffer: new Float32Array(this.bufferLength), trackId };\n buffer = this.write.buffer;\n offset = 0;\n }\n }\n this.writeOffset = offset;\n return true;\n }\n\n process(inputs, outputs, parameters) {\n const output = outputs[0];\n const outputChannelData = output[0];\n const outputBuffers = this.outputBuffers;\n if (this.hasInterrupted) {\n this.port.postMessage({ event: 'stop' });\n return false;\n } else if (outputBuffers.length) {\n this.hasStarted = true;\n const { buffer, trackId } = outputBuffers.shift();\n for (let i = 0; i < outputChannelData.length; i++) {\n outputChannelData[i] = buffer[i] || 0;\n }\n if (trackId) {\n this.trackSampleOffsets[trackId] =\n this.trackSampleOffsets[trackId] || 0;\n this.trackSampleOffsets[trackId] += buffer.length;\n }\n return true;\n } else if (this.hasStarted) {\n this.port.postMessage({ event: 'stop' });\n return false;\n } else {\n return true;\n }\n }\n}\n\nregisterProcessor('stream_processor', StreamProcessor);\n"; +export const StreamProcessorSrc: any; +//# sourceMappingURL=stream_processor.d.ts.map \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/worklets/stream_processor.d.ts.map b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/worklets/stream_processor.d.ts.map new file mode 100644 index 0000000000..c372e0b2c4 --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/worklets/stream_processor.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"stream_processor.d.ts","sourceRoot":"","sources":["../../../lib/worklets/stream_processor.js"],"names":[],"mappings":"AAAA,q4FAyFE;AAMF,qCAAsC"} \ No newline at end of file diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/index.js b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/index.js new file mode 100644 index 0000000000..712389428b --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/index.js @@ -0,0 +1,6 @@ +import { WavPacker } from './lib/wav_packer.js'; +import { AudioAnalysis } from './lib/analysis/audio_analysis.js'; +import { WavStreamPlayer } from './lib/wav_stream_player.js'; +import { WavRecorder } from './lib/wav_recorder.js'; + +export { AudioAnalysis, WavPacker, WavStreamPlayer, WavRecorder }; diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/analysis/audio_analysis.js b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/analysis/audio_analysis.js new file mode 100644 index 0000000000..4af34d54c4 --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/analysis/audio_analysis.js @@ -0,0 +1,203 @@ +import { + noteFrequencies, + noteFrequencyLabels, + voiceFrequencies, + voiceFrequencyLabels, +} from './constants.js'; + +/** + * Output of AudioAnalysis for the frequency domain of the audio + * @typedef {Object} AudioAnalysisOutputType + * @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive + * @property {number[]} frequencies Raw frequency bucket values + * @property {string[]} labels Labels for the frequency bucket values + */ + +/** + * Analyzes audio for visual output + * @class + */ +export class AudioAnalysis { + /** + * Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range + * returns human-readable formatting and labels + * @param {AnalyserNode} analyser + * @param {number} sampleRate + * @param {Float32Array} [fftResult] + * @param {"frequency"|"music"|"voice"} [analysisType] + * @param {number} [minDecibels] default -100 + * @param {number} [maxDecibels] default -30 + * @returns {AudioAnalysisOutputType} + */ + static getFrequencies( + analyser, + sampleRate, + fftResult, + analysisType = 'frequency', + minDecibels = -100, + maxDecibels = -30, + ) { + if (!fftResult) { + fftResult = new Float32Array(analyser.frequencyBinCount); + analyser.getFloatFrequencyData(fftResult); + } + const nyquistFrequency = sampleRate / 2; + const frequencyStep = (1 / fftResult.length) * nyquistFrequency; + let outputValues; + let frequencies; + let labels; + if (analysisType === 'music' || analysisType === 'voice') { + const useFrequencies = + analysisType === 'voice' ? voiceFrequencies : noteFrequencies; + const aggregateOutput = Array(useFrequencies.length).fill(minDecibels); + for (let i = 0; i < fftResult.length; i++) { + const frequency = i * frequencyStep; + const amplitude = fftResult[i]; + for (let n = useFrequencies.length - 1; n >= 0; n--) { + if (frequency > useFrequencies[n]) { + aggregateOutput[n] = Math.max(aggregateOutput[n], amplitude); + break; + } + } + } + outputValues = aggregateOutput; + frequencies = + analysisType === 'voice' ? voiceFrequencies : noteFrequencies; + labels = + analysisType === 'voice' ? voiceFrequencyLabels : noteFrequencyLabels; + } else { + outputValues = Array.from(fftResult); + frequencies = outputValues.map((_, i) => frequencyStep * i); + labels = frequencies.map((f) => `${f.toFixed(2)} Hz`); + } + // We normalize to {0, 1} + const normalizedOutput = outputValues.map((v) => { + return Math.max( + 0, + Math.min((v - minDecibels) / (maxDecibels - minDecibels), 1), + ); + }); + const values = new Float32Array(normalizedOutput); + return { + values, + frequencies, + labels, + }; + } + + /** + * Creates a new AudioAnalysis instance for an HTMLAudioElement + * @param {HTMLAudioElement} audioElement + * @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer + * @returns {AudioAnalysis} + */ + constructor(audioElement, audioBuffer = null) { + this.fftResults = []; + if (audioBuffer) { + /** + * Modified from + * https://stackoverflow.com/questions/75063715/using-the-web-audio-api-to-analyze-a-song-without-playing + * + * We do this to populate FFT values for the audio if provided an `audioBuffer` + * The reason to do this is that Safari fails when using `createMediaElementSource` + * This has a non-zero RAM cost so we only opt-in to run it on Safari, Chrome is better + */ + const { length, sampleRate } = audioBuffer; + const offlineAudioContext = new OfflineAudioContext({ + length, + sampleRate, + }); + const source = offlineAudioContext.createBufferSource(); + source.buffer = audioBuffer; + const analyser = offlineAudioContext.createAnalyser(); + analyser.fftSize = 8192; + analyser.smoothingTimeConstant = 0.1; + source.connect(analyser); + // limit is :: 128 / sampleRate; + // but we just want 60fps - cuts ~1s from 6MB to 1MB of RAM + const renderQuantumInSeconds = 1 / 60; + const durationInSeconds = length / sampleRate; + const analyze = (index) => { + const suspendTime = renderQuantumInSeconds * index; + if (suspendTime < durationInSeconds) { + offlineAudioContext.suspend(suspendTime).then(() => { + const fftResult = new Float32Array(analyser.frequencyBinCount); + analyser.getFloatFrequencyData(fftResult); + this.fftResults.push(fftResult); + analyze(index + 1); + }); + } + if (index === 1) { + offlineAudioContext.startRendering(); + } else { + offlineAudioContext.resume(); + } + }; + source.start(0); + analyze(1); + this.audio = audioElement; + this.context = offlineAudioContext; + this.analyser = analyser; + this.sampleRate = sampleRate; + this.audioBuffer = audioBuffer; + } else { + const audioContext = new AudioContext(); + const track = audioContext.createMediaElementSource(audioElement); + const analyser = audioContext.createAnalyser(); + analyser.fftSize = 8192; + analyser.smoothingTimeConstant = 0.1; + track.connect(analyser); + analyser.connect(audioContext.destination); + this.audio = audioElement; + this.context = audioContext; + this.analyser = analyser; + this.sampleRate = this.context.sampleRate; + this.audioBuffer = null; + } + } + + /** + * Gets the current frequency domain data from the playing audio track + * @param {"frequency"|"music"|"voice"} [analysisType] + * @param {number} [minDecibels] default -100 + * @param {number} [maxDecibels] default -30 + * @returns {AudioAnalysisOutputType} + */ + getFrequencies( + analysisType = 'frequency', + minDecibels = -100, + maxDecibels = -30, + ) { + let fftResult = null; + if (this.audioBuffer && this.fftResults.length) { + const pct = this.audio.currentTime / this.audio.duration; + const index = Math.min( + (pct * this.fftResults.length) | 0, + this.fftResults.length - 1, + ); + fftResult = this.fftResults[index]; + } + return AudioAnalysis.getFrequencies( + this.analyser, + this.sampleRate, + fftResult, + analysisType, + minDecibels, + maxDecibels, + ); + } + + /** + * Resume the internal AudioContext if it was suspended due to the lack of + * user interaction when the AudioAnalysis was instantiated. + * @returns {Promise} + */ + async resumeIfSuspended() { + if (this.context.state === 'suspended') { + await this.context.resume(); + } + return true; + } +} + +globalThis.AudioAnalysis = AudioAnalysis; diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/analysis/constants.js b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/analysis/constants.js new file mode 100644 index 0000000000..f14da38e62 --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/analysis/constants.js @@ -0,0 +1,60 @@ +/** + * Constants for help with visualization + * Helps map frequency ranges from Fast Fourier Transform + * to human-interpretable ranges, notably music ranges and + * human vocal ranges. + */ + +// Eighth octave frequencies +const octave8Frequencies = [ + 4186.01, 4434.92, 4698.63, 4978.03, 5274.04, 5587.65, 5919.91, 6271.93, + 6644.88, 7040.0, 7458.62, 7902.13, +]; + +// Labels for each of the above frequencies +const octave8FrequencyLabels = [ + 'C', + 'C#', + 'D', + 'D#', + 'E', + 'F', + 'F#', + 'G', + 'G#', + 'A', + 'A#', + 'B', +]; + +/** + * All note frequencies from 1st to 8th octave + * in format "A#8" (A#, 8th octave) + */ +export const noteFrequencies = []; +export const noteFrequencyLabels = []; +for (let i = 1; i <= 8; i++) { + for (let f = 0; f < octave8Frequencies.length; f++) { + const freq = octave8Frequencies[f]; + noteFrequencies.push(freq / Math.pow(2, 8 - i)); + noteFrequencyLabels.push(octave8FrequencyLabels[f] + i); + } +} + +/** + * Subset of the note frequencies between 32 and 2000 Hz + * 6 octave range: C1 to B6 + */ +const voiceFrequencyRange = [32.0, 2000.0]; +export const voiceFrequencies = noteFrequencies.filter((_, i) => { + return ( + noteFrequencies[i] > voiceFrequencyRange[0] && + noteFrequencies[i] < voiceFrequencyRange[1] + ); +}); +export const voiceFrequencyLabels = noteFrequencyLabels.filter((_, i) => { + return ( + noteFrequencies[i] > voiceFrequencyRange[0] && + noteFrequencies[i] < voiceFrequencyRange[1] + ); +}); diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/wav_packer.js b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/wav_packer.js new file mode 100644 index 0000000000..7146b7fdeb --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/wav_packer.js @@ -0,0 +1,113 @@ +/** + * Raw wav audio file contents + * @typedef {Object} WavPackerAudioType + * @property {Blob} blob + * @property {string} url + * @property {number} channelCount + * @property {number} sampleRate + * @property {number} duration + */ + +/** + * Utility class for assembling PCM16 "audio/wav" data + * @class + */ +export class WavPacker { + /** + * Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format + * @param {Float32Array} float32Array + * @returns {ArrayBuffer} + */ + static floatTo16BitPCM(float32Array) { + const buffer = new ArrayBuffer(float32Array.length * 2); + const view = new DataView(buffer); + let offset = 0; + for (let i = 0; i < float32Array.length; i++, offset += 2) { + let s = Math.max(-1, Math.min(1, float32Array[i])); + view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true); + } + return buffer; + } + + /** + * Concatenates two ArrayBuffers + * @param {ArrayBuffer} leftBuffer + * @param {ArrayBuffer} rightBuffer + * @returns {ArrayBuffer} + */ + static mergeBuffers(leftBuffer, rightBuffer) { + const tmpArray = new Uint8Array( + leftBuffer.byteLength + rightBuffer.byteLength + ); + tmpArray.set(new Uint8Array(leftBuffer), 0); + tmpArray.set(new Uint8Array(rightBuffer), leftBuffer.byteLength); + return tmpArray.buffer; + } + + /** + * Packs data into an Int16 format + * @private + * @param {number} size 0 = 1x Int16, 1 = 2x Int16 + * @param {number} arg value to pack + * @returns + */ + _packData(size, arg) { + return [ + new Uint8Array([arg, arg >> 8]), + new Uint8Array([arg, arg >> 8, arg >> 16, arg >> 24]), + ][size]; + } + + /** + * Packs audio into "audio/wav" Blob + * @param {number} sampleRate + * @param {{bitsPerSample: number, channels: Array, data: Int16Array}} audio + * @returns {WavPackerAudioType} + */ + pack(sampleRate, audio) { + if (!audio?.bitsPerSample) { + throw new Error(`Missing "bitsPerSample"`); + } else if (!audio?.channels) { + throw new Error(`Missing "channels"`); + } else if (!audio?.data) { + throw new Error(`Missing "data"`); + } + const { bitsPerSample, channels, data } = audio; + const output = [ + // Header + 'RIFF', + this._packData( + 1, + 4 + (8 + 24) /* chunk 1 length */ + (8 + 8) /* chunk 2 length */ + ), // Length + 'WAVE', + // chunk 1 + 'fmt ', // Sub-chunk identifier + this._packData(1, 16), // Chunk length + this._packData(0, 1), // Audio format (1 is linear quantization) + this._packData(0, channels.length), + this._packData(1, sampleRate), + this._packData(1, (sampleRate * channels.length * bitsPerSample) / 8), // Byte rate + this._packData(0, (channels.length * bitsPerSample) / 8), + this._packData(0, bitsPerSample), + // chunk 2 + 'data', // Sub-chunk identifier + this._packData( + 1, + (channels[0].length * channels.length * bitsPerSample) / 8 + ), // Chunk length + data, + ]; + const blob = new Blob(output, { type: 'audio/mpeg' }); + const url = URL.createObjectURL(blob); + return { + blob, + url, + channelCount: channels.length, + sampleRate, + duration: data.byteLength / (channels.length * sampleRate * 2), + }; + } +} + +globalThis.WavPacker = WavPacker; diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/wav_recorder.js b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/wav_recorder.js new file mode 100644 index 0000000000..a4f1d045bf --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/wav_recorder.js @@ -0,0 +1,548 @@ +import { AudioProcessorSrc } from './worklets/audio_processor.js'; +import { AudioAnalysis } from './analysis/audio_analysis.js'; +import { WavPacker } from './wav_packer.js'; + +/** + * Decodes audio into a wav file + * @typedef {Object} DecodedAudioType + * @property {Blob} blob + * @property {string} url + * @property {Float32Array} values + * @property {AudioBuffer} audioBuffer + */ + +/** + * Records live stream of user audio as PCM16 "audio/wav" data + * @class + */ +export class WavRecorder { + /** + * Create a new WavRecorder instance + * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options] + * @returns {WavRecorder} + */ + constructor({ + sampleRate = 44100, + outputToSpeakers = false, + debug = false, + } = {}) { + // Script source + this.scriptSrc = AudioProcessorSrc; + // Config + this.sampleRate = sampleRate; + this.outputToSpeakers = outputToSpeakers; + this.debug = !!debug; + this._deviceChangeCallback = null; + this._devices = []; + // State variables + this.stream = null; + this.processor = null; + this.source = null; + this.node = null; + this.recording = false; + // Event handling with AudioWorklet + this._lastEventId = 0; + this.eventReceipts = {}; + this.eventTimeout = 5000; + // Process chunks of audio + this._chunkProcessor = () => {}; + this._chunkProcessorSize = void 0; + this._chunkProcessorBuffer = { + raw: new ArrayBuffer(0), + mono: new ArrayBuffer(0), + }; + } + + /** + * Decodes audio data from multiple formats to a Blob, url, Float32Array and AudioBuffer + * @param {Blob|Float32Array|Int16Array|ArrayBuffer|number[]} audioData + * @param {number} sampleRate + * @param {number} fromSampleRate + * @returns {Promise} + */ + static async decode(audioData, sampleRate = 44100, fromSampleRate = -1) { + const context = new AudioContext({ sampleRate }); + let arrayBuffer; + let blob; + if (audioData instanceof Blob) { + if (fromSampleRate !== -1) { + throw new Error( + `Can not specify "fromSampleRate" when reading from Blob`, + ); + } + blob = audioData; + arrayBuffer = await blob.arrayBuffer(); + } else if (audioData instanceof ArrayBuffer) { + if (fromSampleRate !== -1) { + throw new Error( + `Can not specify "fromSampleRate" when reading from ArrayBuffer`, + ); + } + arrayBuffer = audioData; + blob = new Blob([arrayBuffer], { type: 'audio/wav' }); + } else { + let float32Array; + let data; + if (audioData instanceof Int16Array) { + data = audioData; + float32Array = new Float32Array(audioData.length); + for (let i = 0; i < audioData.length; i++) { + float32Array[i] = audioData[i] / 0x8000; + } + } else if (audioData instanceof Float32Array) { + float32Array = audioData; + } else if (audioData instanceof Array) { + float32Array = new Float32Array(audioData); + } else { + throw new Error( + `"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array`, + ); + } + if (fromSampleRate === -1) { + throw new Error( + `Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`, + ); + } else if (fromSampleRate < 3000) { + throw new Error(`Minimum "fromSampleRate" is 3000 (3kHz)`); + } + if (!data) { + data = WavPacker.floatTo16BitPCM(float32Array); + } + const audio = { + bitsPerSample: 16, + channels: [float32Array], + data, + }; + const packer = new WavPacker(); + const result = packer.pack(fromSampleRate, audio); + blob = result.blob; + arrayBuffer = await blob.arrayBuffer(); + } + const audioBuffer = await context.decodeAudioData(arrayBuffer); + const values = audioBuffer.getChannelData(0); + const url = URL.createObjectURL(blob); + return { + blob, + url, + values, + audioBuffer, + }; + } + + /** + * Logs data in debug mode + * @param {...any} arguments + * @returns {true} + */ + log() { + if (this.debug) { + this.log(...arguments); + } + return true; + } + + /** + * Retrieves the current sampleRate for the recorder + * @returns {number} + */ + getSampleRate() { + return this.sampleRate; + } + + /** + * Retrieves the current status of the recording + * @returns {"ended"|"paused"|"recording"} + */ + getStatus() { + if (!this.processor) { + return 'ended'; + } else if (!this.recording) { + return 'paused'; + } else { + return 'recording'; + } + } + + /** + * Sends an event to the AudioWorklet + * @private + * @param {string} name + * @param {{[key: string]: any}} data + * @param {AudioWorkletNode} [_processor] + * @returns {Promise<{[key: string]: any}>} + */ + async _event(name, data = {}, _processor = null) { + _processor = _processor || this.processor; + if (!_processor) { + throw new Error('Can not send events without recording first'); + } + const message = { + event: name, + id: this._lastEventId++, + data, + }; + _processor.port.postMessage(message); + const t0 = new Date().valueOf(); + while (!this.eventReceipts[message.id]) { + if (new Date().valueOf() - t0 > this.eventTimeout) { + throw new Error(`Timeout waiting for "${name}" event`); + } + await new Promise((res) => setTimeout(() => res(true), 1)); + } + const payload = this.eventReceipts[message.id]; + delete this.eventReceipts[message.id]; + return payload; + } + + /** + * Sets device change callback, remove if callback provided is `null` + * @param {(Array): void|null} callback + * @returns {true} + */ + listenForDeviceChange(callback) { + if (callback === null && this._deviceChangeCallback) { + navigator.mediaDevices.removeEventListener( + 'devicechange', + this._deviceChangeCallback, + ); + this._deviceChangeCallback = null; + } else if (callback !== null) { + // Basically a debounce; we only want this called once when devices change + // And we only want the most recent callback() to be executed + // if a few are operating at the same time + let lastId = 0; + let lastDevices = []; + const serializeDevices = (devices) => + devices + .map((d) => d.deviceId) + .sort() + .join(','); + const cb = async () => { + let id = ++lastId; + const devices = await this.listDevices(); + if (id === lastId) { + if (serializeDevices(lastDevices) !== serializeDevices(devices)) { + lastDevices = devices; + callback(devices.slice()); + } + } + }; + navigator.mediaDevices.addEventListener('devicechange', cb); + cb(); + this._deviceChangeCallback = cb; + } + return true; + } + + /** + * Manually request permission to use the microphone + * @returns {Promise} + */ + async requestPermission() { + const permissionStatus = await navigator.permissions.query({ + name: 'microphone', + }); + if (permissionStatus.state === 'denied') { + window.alert('You must grant microphone access to use this feature.'); + } else if (permissionStatus.state === 'prompt') { + try { + const stream = await navigator.mediaDevices.getUserMedia({ + audio: true, + }); + const tracks = stream.getTracks(); + tracks.forEach((track) => track.stop()); + } catch (e) { + window.alert('You must grant microphone access to use this feature.'); + } + } + return true; + } + + /** + * List all eligible devices for recording, will request permission to use microphone + * @returns {Promise>} + */ + async listDevices() { + if ( + !navigator.mediaDevices || + !('enumerateDevices' in navigator.mediaDevices) + ) { + throw new Error('Could not request user devices'); + } + await this.requestPermission(); + const devices = await navigator.mediaDevices.enumerateDevices(); + const audioDevices = devices.filter( + (device) => device.kind === 'audioinput', + ); + const defaultDeviceIndex = audioDevices.findIndex( + (device) => device.deviceId === 'default', + ); + const deviceList = []; + if (defaultDeviceIndex !== -1) { + let defaultDevice = audioDevices.splice(defaultDeviceIndex, 1)[0]; + let existingIndex = audioDevices.findIndex( + (device) => device.groupId === defaultDevice.groupId, + ); + if (existingIndex !== -1) { + defaultDevice = audioDevices.splice(existingIndex, 1)[0]; + } + defaultDevice.default = true; + deviceList.push(defaultDevice); + } + return deviceList.concat(audioDevices); + } + + /** + * Begins a recording session and requests microphone permissions if not already granted + * Microphone recording indicator will appear on browser tab but status will be "paused" + * @param {string} [deviceId] if no device provided, default device will be used + * @returns {Promise} + */ + async begin(deviceId) { + if (this.processor) { + throw new Error( + `Already connected: please call .end() to start a new session`, + ); + } + + if ( + !navigator.mediaDevices || + !('getUserMedia' in navigator.mediaDevices) + ) { + throw new Error('Could not request user media'); + } + try { + const config = { audio: true }; + if (deviceId) { + config.audio = { deviceId: { exact: deviceId } }; + } + this.stream = await navigator.mediaDevices.getUserMedia(config); + } catch (err) { + throw new Error('Could not start media stream'); + } + + const context = new AudioContext({ sampleRate: this.sampleRate }); + const source = context.createMediaStreamSource(this.stream); + // Load and execute the module script. + try { + await context.audioWorklet.addModule(this.scriptSrc); + } catch (e) { + console.error(e); + throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`); + } + const processor = new AudioWorkletNode(context, 'audio_processor'); + processor.port.onmessage = (e) => { + const { event, id, data } = e.data; + if (event === 'receipt') { + this.eventReceipts[id] = data; + } else if (event === 'chunk') { + if (this._chunkProcessorSize) { + const buffer = this._chunkProcessorBuffer; + this._chunkProcessorBuffer = { + raw: WavPacker.mergeBuffers(buffer.raw, data.raw), + mono: WavPacker.mergeBuffers(buffer.mono, data.mono), + }; + if ( + this._chunkProcessorBuffer.mono.byteLength >= + this._chunkProcessorSize + ) { + this._chunkProcessor(this._chunkProcessorBuffer); + this._chunkProcessorBuffer = { + raw: new ArrayBuffer(0), + mono: new ArrayBuffer(0), + }; + } + } else { + this._chunkProcessor(data); + } + } + }; + + const node = source.connect(processor); + const analyser = context.createAnalyser(); + analyser.fftSize = 8192; + analyser.smoothingTimeConstant = 0.1; + node.connect(analyser); + if (this.outputToSpeakers) { + // eslint-disable-next-line no-console + console.warn( + 'Warning: Output to speakers may affect sound quality,\n' + + 'especially due to system audio feedback preventative measures.\n' + + 'use only for debugging', + ); + analyser.connect(context.destination); + } + + this.source = source; + this.node = node; + this.analyser = analyser; + this.processor = processor; + return true; + } + + /** + * Gets the current frequency domain data from the recording track + * @param {"frequency"|"music"|"voice"} [analysisType] + * @param {number} [minDecibels] default -100 + * @param {number} [maxDecibels] default -30 + * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType} + */ + getFrequencies( + analysisType = 'frequency', + minDecibels = -100, + maxDecibels = -30, + ) { + if (!this.processor) { + throw new Error('Session ended: please call .begin() first'); + } + return AudioAnalysis.getFrequencies( + this.analyser, + this.sampleRate, + null, + analysisType, + minDecibels, + maxDecibels, + ); + } + + /** + * Pauses the recording + * Keeps microphone stream open but halts storage of audio + * @returns {Promise} + */ + async pause() { + if (!this.processor) { + throw new Error('Session ended: please call .begin() first'); + } else if (!this.recording) { + throw new Error('Already paused: please call .record() first'); + } + if (this._chunkProcessorBuffer.raw.byteLength) { + this._chunkProcessor(this._chunkProcessorBuffer); + } + this.log('Pausing ...'); + await this._event('stop'); + this.recording = false; + return true; + } + + /** + * Start recording stream and storing to memory from the connected audio source + * @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor] + * @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio + * @returns {Promise} + */ + async record(chunkProcessor = () => {}, chunkSize = 8192) { + if (!this.processor) { + throw new Error('Session ended: please call .begin() first'); + } else if (this.recording) { + throw new Error('Already recording: please call .pause() first'); + } else if (typeof chunkProcessor !== 'function') { + throw new Error(`chunkProcessor must be a function`); + } + this._chunkProcessor = chunkProcessor; + this._chunkProcessorSize = chunkSize; + this._chunkProcessorBuffer = { + raw: new ArrayBuffer(0), + mono: new ArrayBuffer(0), + }; + this.log('Recording ...'); + await this._event('start'); + this.recording = true; + return true; + } + + /** + * Clears the audio buffer, empties stored recording + * @returns {Promise} + */ + async clear() { + if (!this.processor) { + throw new Error('Session ended: please call .begin() first'); + } + await this._event('clear'); + return true; + } + + /** + * Reads the current audio stream data + * @returns {Promise<{meanValues: Float32Array, channels: Array}>} + */ + async read() { + if (!this.processor) { + throw new Error('Session ended: please call .begin() first'); + } + this.log('Reading ...'); + const result = await this._event('read'); + return result; + } + + /** + * Saves the current audio stream to a file + * @param {boolean} [force] Force saving while still recording + * @returns {Promise} + */ + async save(force = false) { + if (!this.processor) { + throw new Error('Session ended: please call .begin() first'); + } + if (!force && this.recording) { + throw new Error( + 'Currently recording: please call .pause() first, or call .save(true) to force', + ); + } + this.log('Exporting ...'); + const exportData = await this._event('export'); + const packer = new WavPacker(); + const result = packer.pack(this.sampleRate, exportData.audio); + return result; + } + + /** + * Ends the current recording session and saves the result + * @returns {Promise} + */ + async end() { + if (!this.processor) { + throw new Error('Session ended: please call .begin() first'); + } + + const _processor = this.processor; + + this.log('Stopping ...'); + await this._event('stop'); + this.recording = false; + const tracks = this.stream.getTracks(); + tracks.forEach((track) => track.stop()); + + this.log('Exporting ...'); + const exportData = await this._event('export', {}, _processor); + + this.processor.disconnect(); + this.source.disconnect(); + this.node.disconnect(); + this.analyser.disconnect(); + this.stream = null; + this.processor = null; + this.source = null; + this.node = null; + + const packer = new WavPacker(); + const result = packer.pack(this.sampleRate, exportData.audio); + return result; + } + + /** + * Performs a full cleanup of WavRecorder instance + * Stops actively listening via microphone and removes existing listeners + * @returns {Promise} + */ + async quit() { + this.listenForDeviceChange(null); + if (this.processor) { + await this.end(); + } + return true; + } +} + +globalThis.WavRecorder = WavRecorder; diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/wav_stream_player.js b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/wav_stream_player.js new file mode 100644 index 0000000000..500eff6c5c --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/wav_stream_player.js @@ -0,0 +1,160 @@ +import { StreamProcessorSrc } from './worklets/stream_processor.js'; +import { AudioAnalysis } from './analysis/audio_analysis.js'; + +/** + * Plays audio streams received in raw PCM16 chunks from the browser + * @class + */ +export class WavStreamPlayer { + /** + * Creates a new WavStreamPlayer instance + * @param {{sampleRate?: number}} options + * @returns {WavStreamPlayer} + */ + constructor({ sampleRate = 44100 } = {}) { + this.scriptSrc = StreamProcessorSrc; + this.sampleRate = sampleRate; + this.context = null; + this.stream = null; + this.analyser = null; + this.trackSampleOffsets = {}; + this.interruptedTrackIds = {}; + } + + /** + * Connects the audio context and enables output to speakers + * @returns {Promise} + */ + async connect() { + this.context = new AudioContext({ sampleRate: this.sampleRate }); + if (this.context.state === 'suspended') { + await this.context.resume(); + } + try { + await this.context.audioWorklet.addModule(this.scriptSrc); + } catch (e) { + console.error(e); + throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`); + } + const analyser = this.context.createAnalyser(); + analyser.fftSize = 8192; + analyser.smoothingTimeConstant = 0.1; + this.analyser = analyser; + return true; + } + + /** + * Gets the current frequency domain data from the playing track + * @param {"frequency"|"music"|"voice"} [analysisType] + * @param {number} [minDecibels] default -100 + * @param {number} [maxDecibels] default -30 + * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType} + */ + getFrequencies( + analysisType = 'frequency', + minDecibels = -100, + maxDecibels = -30 + ) { + if (!this.analyser) { + throw new Error('Not connected, please call .connect() first'); + } + return AudioAnalysis.getFrequencies( + this.analyser, + this.sampleRate, + null, + analysisType, + minDecibels, + maxDecibels + ); + } + + /** + * Starts audio streaming + * @private + * @returns {Promise} + */ + _start() { + const streamNode = new AudioWorkletNode(this.context, 'stream_processor'); + streamNode.connect(this.context.destination); + streamNode.port.onmessage = (e) => { + const { event } = e.data; + if (event === 'stop') { + streamNode.disconnect(); + this.stream = null; + } else if (event === 'offset') { + const { requestId, trackId, offset } = e.data; + const currentTime = offset / this.sampleRate; + this.trackSampleOffsets[requestId] = { trackId, offset, currentTime }; + } + }; + this.analyser.disconnect(); + streamNode.connect(this.analyser); + this.stream = streamNode; + return true; + } + + /** + * Adds 16BitPCM data to the currently playing audio stream + * You can add chunks beyond the current play point and they will be queued for play + * @param {ArrayBuffer|Int16Array} arrayBuffer + * @param {string} [trackId] + * @returns {Int16Array} + */ + add16BitPCM(arrayBuffer, trackId = 'default') { + if (typeof trackId !== 'string') { + throw new Error(`trackId must be a string`); + } else if (this.interruptedTrackIds[trackId]) { + return; + } + if (!this.stream) { + this._start(); + } + let buffer; + if (arrayBuffer instanceof Int16Array) { + buffer = arrayBuffer; + } else if (arrayBuffer instanceof ArrayBuffer) { + buffer = new Int16Array(arrayBuffer); + } else { + throw new Error(`argument must be Int16Array or ArrayBuffer`); + } + this.stream.port.postMessage({ event: 'write', buffer, trackId }); + return buffer; + } + + /** + * Gets the offset (sample count) of the currently playing stream + * @param {boolean} [interrupt] + * @returns {{trackId: string|null, offset: number, currentTime: number}} + */ + async getTrackSampleOffset(interrupt = false) { + if (!this.stream) { + return null; + } + const requestId = crypto.randomUUID(); + this.stream.port.postMessage({ + event: interrupt ? 'interrupt' : 'offset', + requestId, + }); + let trackSampleOffset; + while (!trackSampleOffset) { + trackSampleOffset = this.trackSampleOffsets[requestId]; + await new Promise((r) => setTimeout(() => r(), 1)); + } + const { trackId } = trackSampleOffset; + if (interrupt && trackId) { + this.interruptedTrackIds[trackId] = true; + } + return trackSampleOffset; + } + + /** + * Strips the current stream and returns the sample offset of the audio + * @param {boolean} [interrupt] + * @returns {{trackId: string|null, offset: number, currentTime: number}} + */ + async interrupt() { + return this.getTrackSampleOffset(true); + } +} + +globalThis.WavStreamPlayer = WavStreamPlayer; diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/worklets/audio_processor.js b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/worklets/audio_processor.js new file mode 100644 index 0000000000..61dd7ec9ce --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/worklets/audio_processor.js @@ -0,0 +1,214 @@ +const AudioProcessorWorklet = ` +class AudioProcessor extends AudioWorkletProcessor { + + constructor() { + super(); + this.port.onmessage = this.receive.bind(this); + this.initialize(); + } + + initialize() { + this.foundAudio = false; + this.recording = false; + this.chunks = []; + } + + /** + * Concatenates sampled chunks into channels + * Format is chunk[Left[], Right[]] + */ + readChannelData(chunks, channel = -1, maxChannels = 9) { + let channelLimit; + if (channel !== -1) { + if (chunks[0] && chunks[0].length - 1 < channel) { + throw new Error( + \`Channel \${channel} out of range: max \${chunks[0].length}\` + ); + } + channelLimit = channel + 1; + } else { + channel = 0; + channelLimit = Math.min(chunks[0] ? chunks[0].length : 1, maxChannels); + } + const channels = []; + for (let n = channel; n < channelLimit; n++) { + const length = chunks.reduce((sum, chunk) => { + return sum + chunk[n].length; + }, 0); + const buffers = chunks.map((chunk) => chunk[n]); + const result = new Float32Array(length); + let offset = 0; + for (let i = 0; i < buffers.length; i++) { + result.set(buffers[i], offset); + offset += buffers[i].length; + } + channels[n] = result; + } + return channels; + } + + /** + * Combines parallel audio data into correct format, + * channels[Left[], Right[]] to float32Array[LRLRLRLR...] + */ + formatAudioData(channels) { + if (channels.length === 1) { + // Simple case is only one channel + const float32Array = channels[0].slice(); + const meanValues = channels[0].slice(); + return { float32Array, meanValues }; + } else { + const float32Array = new Float32Array( + channels[0].length * channels.length + ); + const meanValues = new Float32Array(channels[0].length); + for (let i = 0; i < channels[0].length; i++) { + const offset = i * channels.length; + let meanValue = 0; + for (let n = 0; n < channels.length; n++) { + float32Array[offset + n] = channels[n][i]; + meanValue += channels[n][i]; + } + meanValues[i] = meanValue / channels.length; + } + return { float32Array, meanValues }; + } + } + + /** + * Converts 32-bit float data to 16-bit integers + */ + floatTo16BitPCM(float32Array) { + const buffer = new ArrayBuffer(float32Array.length * 2); + const view = new DataView(buffer); + let offset = 0; + for (let i = 0; i < float32Array.length; i++, offset += 2) { + let s = Math.max(-1, Math.min(1, float32Array[i])); + view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true); + } + return buffer; + } + + /** + * Retrieves the most recent amplitude values from the audio stream + * @param {number} channel + */ + getValues(channel = -1) { + const channels = this.readChannelData(this.chunks, channel); + const { meanValues } = this.formatAudioData(channels); + return { meanValues, channels }; + } + + /** + * Exports chunks as an audio/wav file + */ + export() { + const channels = this.readChannelData(this.chunks); + const { float32Array, meanValues } = this.formatAudioData(channels); + const audioData = this.floatTo16BitPCM(float32Array); + return { + meanValues: meanValues, + audio: { + bitsPerSample: 16, + channels: channels, + data: audioData, + }, + }; + } + + receive(e) { + const { event, id } = e.data; + let receiptData = {}; + switch (event) { + case 'start': + this.recording = true; + break; + case 'stop': + this.recording = false; + break; + case 'clear': + this.initialize(); + break; + case 'export': + receiptData = this.export(); + break; + case 'read': + receiptData = this.getValues(); + break; + default: + break; + } + // Always send back receipt + this.port.postMessage({ event: 'receipt', id, data: receiptData }); + } + + sendChunk(chunk) { + const channels = this.readChannelData([chunk]); + const { float32Array, meanValues } = this.formatAudioData(channels); + const rawAudioData = this.floatTo16BitPCM(float32Array); + const monoAudioData = this.floatTo16BitPCM(meanValues); + this.port.postMessage({ + event: 'chunk', + data: { + mono: monoAudioData, + raw: rawAudioData, + }, + }); + } + + process(inputList, outputList, parameters) { + // Copy input to output (e.g. speakers) + // Note that this creates choppy sounds with Mac products + const sourceLimit = Math.min(inputList.length, outputList.length); + for (let inputNum = 0; inputNum < sourceLimit; inputNum++) { + const input = inputList[inputNum]; + const output = outputList[inputNum]; + const channelCount = Math.min(input.length, output.length); + for (let channelNum = 0; channelNum < channelCount; channelNum++) { + input[channelNum].forEach((sample, i) => { + output[channelNum][i] = sample; + }); + } + } + const inputs = inputList[0]; + // There's latency at the beginning of a stream before recording starts + // Make sure we actually receive audio data before we start storing chunks + let sliceIndex = 0; + if (!this.foundAudio) { + for (const channel of inputs) { + sliceIndex = 0; // reset for each channel + if (this.foundAudio) { + break; + } + if (channel) { + for (const value of channel) { + if (value !== 0) { + // find only one non-zero entry in any channel + this.foundAudio = true; + break; + } else { + sliceIndex++; + } + } + } + } + } + if (inputs && inputs[0] && this.foundAudio && this.recording) { + // We need to copy the TypedArray, because the \`process\` + // internals will reuse the same buffer to hold each input + const chunk = inputs.map((input) => input.slice(sliceIndex)); + this.chunks.push(chunk); + this.sendChunk(chunk); + } + return true; + } +} + +registerProcessor('audio_processor', AudioProcessor); +`; + +const script = new Blob([AudioProcessorWorklet], { + type: 'application/javascript', +}); +const src = URL.createObjectURL(script); +export const AudioProcessorSrc = src; diff --git a/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/worklets/stream_processor.js b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/worklets/stream_processor.js new file mode 100644 index 0000000000..d3c794a88c --- /dev/null +++ b/examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/lib/worklets/stream_processor.js @@ -0,0 +1,96 @@ +export const StreamProcessorWorklet = ` +class StreamProcessor extends AudioWorkletProcessor { + constructor() { + super(); + this.hasStarted = false; + this.hasInterrupted = false; + this.outputBuffers = []; + this.bufferLength = 128; + this.write = { buffer: new Float32Array(this.bufferLength), trackId: null }; + this.writeOffset = 0; + this.trackSampleOffsets = {}; + this.port.onmessage = (event) => { + if (event.data) { + const payload = event.data; + if (payload.event === 'write') { + const int16Array = payload.buffer; + const float32Array = new Float32Array(int16Array.length); + for (let i = 0; i < int16Array.length; i++) { + float32Array[i] = int16Array[i] / 0x8000; // Convert Int16 to Float32 + } + this.writeData(float32Array, payload.trackId); + } else if ( + payload.event === 'offset' || + payload.event === 'interrupt' + ) { + const requestId = payload.requestId; + const trackId = this.write.trackId; + const offset = this.trackSampleOffsets[trackId] || 0; + this.port.postMessage({ + event: 'offset', + requestId, + trackId, + offset, + }); + if (payload.event === 'interrupt') { + this.hasInterrupted = true; + } + } else { + throw new Error(\`Unhandled event "\${payload.event}"\`); + } + } + }; + } + + writeData(float32Array, trackId = null) { + let { buffer } = this.write; + let offset = this.writeOffset; + for (let i = 0; i < float32Array.length; i++) { + buffer[offset++] = float32Array[i]; + if (offset >= buffer.length) { + this.outputBuffers.push(this.write); + this.write = { buffer: new Float32Array(this.bufferLength), trackId }; + buffer = this.write.buffer; + offset = 0; + } + } + this.writeOffset = offset; + return true; + } + + process(inputs, outputs, parameters) { + const output = outputs[0]; + const outputChannelData = output[0]; + const outputBuffers = this.outputBuffers; + if (this.hasInterrupted) { + this.port.postMessage({ event: 'stop' }); + return false; + } else if (outputBuffers.length) { + this.hasStarted = true; + const { buffer, trackId } = outputBuffers.shift(); + for (let i = 0; i < outputChannelData.length; i++) { + outputChannelData[i] = buffer[i] || 0; + } + if (trackId) { + this.trackSampleOffsets[trackId] = + this.trackSampleOffsets[trackId] || 0; + this.trackSampleOffsets[trackId] += buffer.length; + } + return true; + } else if (this.hasStarted) { + this.port.postMessage({ event: 'stop' }); + return false; + } else { + return true; + } + } +} + +registerProcessor('stream_processor', StreamProcessor); +`; + +const script = new Blob([StreamProcessorWorklet], { + type: 'application/javascript', +}); +const src = URL.createObjectURL(script); +export const StreamProcessorSrc = src;