From 16c492eebd3ceeebde6838e9ba4b1a47dd7f9fe9 Mon Sep 17 00:00:00 2001 From: Guillaume Loulier Date: Sat, 16 Aug 2025 19:59:49 +0200 Subject: [PATCH] feat(platform): ElevenLabs stream for TTS --- .../elevenlabs/text-to-speech-as-stream.php | 35 +++++++++++++++++++ .../Bridge/ElevenLabs/ElevenLabsClient.php | 12 ++++--- .../ElevenLabs/ElevenLabsResultConverter.php | 23 ++++++++++++ .../src/Bridge/ElevenLabs/PlatformFactory.php | 2 +- .../ElevenLabs/ElevenLabsClientTest.php | 35 +++++++++++++++++-- .../ElevenLabs/ElevenLabsConverterTest.php | 7 ++-- 6 files changed, 104 insertions(+), 10 deletions(-) create mode 100644 examples/elevenlabs/text-to-speech-as-stream.php diff --git a/examples/elevenlabs/text-to-speech-as-stream.php b/examples/elevenlabs/text-to-speech-as-stream.php new file mode 100644 index 000000000..abe696449 --- /dev/null +++ b/examples/elevenlabs/text-to-speech-as-stream.php @@ -0,0 +1,35 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabs; +use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory; +use Symfony\AI\Platform\Message\Content\Text; + +require_once dirname(__DIR__).'/bootstrap.php'; + +$platform = PlatformFactory::create( + apiKey: env('ELEVEN_LABS_API_KEY'), + httpClient: http_client(), +); +$model = new ElevenLabs(options: [ + 'voice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN) + 'stream' => true, +]); + +$result = $platform->invoke($model, new Text('The first move is what sets everything in motion.')); + +$content = ''; + +foreach ($result->asStream() as $chunk) { + echo $chunk; +} + +echo \PHP_EOL; diff --git a/src/platform/src/Bridge/ElevenLabs/ElevenLabsClient.php b/src/platform/src/Bridge/ElevenLabs/ElevenLabsClient.php index aef1f5bf8..c5626ee2d 100644 --- a/src/platform/src/Bridge/ElevenLabs/ElevenLabsClient.php +++ b/src/platform/src/Bridge/ElevenLabs/ElevenLabsClient.php @@ -42,7 +42,7 @@ public function request(Model $model, array|string $payload, array $options = [] } if (\in_array($model->getName(), [ElevenLabs::SCRIBE_V1, ElevenLabs::SCRIBE_V1_EXPERIMENTAL], true)) { - return $this->doSpeechToTextRequest($model, $payload, $options); + return $this->doSpeechToTextRequest($model, $payload); } $capabilities = $this->retrieveCapabilities($model); @@ -56,9 +56,8 @@ public function request(Model $model, array|string $payload, array $options = [] /** * @param array $payload - * @param array $options */ - private function doSpeechToTextRequest(Model $model, array|string $payload, array $options): RawHttpResult + private function doSpeechToTextRequest(Model $model, array|string $payload): RawHttpResult { return new RawHttpResult($this->httpClient->request('POST', \sprintf('%s/speech-to-text', $this->hostUrl), [ 'headers' => [ @@ -86,8 +85,13 @@ private function doTextToSpeechRequest(Model $model, array|string $payload, arra } $voice = $options['voice'] ??= $model->getOptions()['voice']; + $stream = $options['stream'] ??= $model->getOptions()['stream'] ?? false; + + $url = $stream + ? \sprintf('%s/text-to-speech/%s/stream', $this->hostUrl, $voice) + : \sprintf('%s/text-to-speech/%s', $this->hostUrl, $voice); - return new RawHttpResult($this->httpClient->request('POST', \sprintf('%s/text-to-speech/%s', $this->hostUrl, $voice), [ + return new RawHttpResult($this->httpClient->request('POST', $url, [ 'headers' => [ 'xi-api-key' => $this->apiKey, ], diff --git a/src/platform/src/Bridge/ElevenLabs/ElevenLabsResultConverter.php b/src/platform/src/Bridge/ElevenLabs/ElevenLabsResultConverter.php index 18cc33caa..7dbe8a604 100644 --- a/src/platform/src/Bridge/ElevenLabs/ElevenLabsResultConverter.php +++ b/src/platform/src/Bridge/ElevenLabs/ElevenLabsResultConverter.php @@ -16,8 +16,10 @@ use Symfony\AI\Platform\Result\BinaryResult; use Symfony\AI\Platform\Result\RawResultInterface; use Symfony\AI\Platform\Result\ResultInterface; +use Symfony\AI\Platform\Result\StreamResult; use Symfony\AI\Platform\Result\TextResult; use Symfony\AI\Platform\ResultConverterInterface; +use Symfony\Contracts\HttpClient\HttpClientInterface; use Symfony\Contracts\HttpClient\ResponseInterface; /** @@ -25,6 +27,11 @@ */ final readonly class ElevenLabsResultConverter implements ResultConverterInterface { + public function __construct( + private HttpClientInterface $httpClient, + ) { + } + public function supports(Model $model): bool { return $model instanceof ElevenLabs; @@ -36,9 +43,25 @@ public function convert(RawResultInterface $result, array $options = []): Result $response = $result->getObject(); return match (true) { + \array_key_exists('stream', $options) && $options['stream'] => new StreamResult($this->convertToGenerator($response)), str_contains($response->getInfo('url'), 'speech-to-text') => new TextResult($result->getData()['text']), str_contains($response->getInfo('url'), 'text-to-speech') => new BinaryResult($result->getObject()->getContent(), 'audio/mpeg'), default => throw new RuntimeException('Unsupported ElevenLabs response.'), }; } + + private function convertToGenerator(ResponseInterface $response): \Generator + { + foreach ($this->httpClient->stream($response) as $chunk) { + if ($chunk->isFirst() || $chunk->isLast()) { + continue; + } + + if ('' === $chunk->getContent()) { + continue; + } + + yield $chunk->getContent(); + } + } } diff --git a/src/platform/src/Bridge/ElevenLabs/PlatformFactory.php b/src/platform/src/Bridge/ElevenLabs/PlatformFactory.php index c61416f0d..84792fb7d 100644 --- a/src/platform/src/Bridge/ElevenLabs/PlatformFactory.php +++ b/src/platform/src/Bridge/ElevenLabs/PlatformFactory.php @@ -32,7 +32,7 @@ public static function create( return new Platform( [new ElevenLabsClient($httpClient, $apiKey, $hostUrl)], - [new ElevenLabsResultConverter()], + [new ElevenLabsResultConverter($httpClient)], $contract ?? ElevenLabsContract::create(), ); } diff --git a/src/platform/tests/Bridge/ElevenLabs/ElevenLabsClientTest.php b/src/platform/tests/Bridge/ElevenLabs/ElevenLabsClientTest.php index f6c9a0fc2..9656e4eee 100644 --- a/src/platform/tests/Bridge/ElevenLabs/ElevenLabsClientTest.php +++ b/src/platform/tests/Bridge/ElevenLabs/ElevenLabsClientTest.php @@ -12,7 +12,6 @@ namespace Symfony\AI\Platform\Tests\Bridge\ElevenLabs; use PHPUnit\Framework\Attributes\CoversClass; -use PHPUnit\Framework\Attributes\Group; use PHPUnit\Framework\Attributes\UsesClass; use PHPUnit\Framework\TestCase; use Symfony\AI\Platform\Bridge\ElevenLabs\Contract\AudioNormalizer; @@ -21,6 +20,7 @@ use Symfony\AI\Platform\Exception\InvalidArgumentException; use Symfony\AI\Platform\Message\Content\Audio; use Symfony\AI\Platform\Model; +use Symfony\AI\Platform\Result\RawHttpResult; use Symfony\Component\HttpClient\MockHttpClient; use Symfony\Component\HttpClient\Response\JsonMockResponse; use Symfony\Component\HttpClient\Response\MockResponse; @@ -30,6 +30,7 @@ #[UsesClass(Model::class)] #[UsesClass(Audio::class)] #[UsesClass(AudioNormalizer::class)] +#[UsesClass(RawHttpResult::class)] final class ElevenLabsClientTest extends TestCase { public function testSupportsModel() @@ -133,7 +134,6 @@ public function testClientCannotPerformTextToSpeechRequestWithoutValidPayload() ]), []); } - #[Group('foo')] public function testClientCanPerformTextToSpeechRequest() { $payload = Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3'); @@ -162,4 +162,35 @@ public function testClientCanPerformTextToSpeechRequest() $this->assertSame(2, $httpClient->getRequestsCount()); } + + public function testClientCanPerformTextToSpeechRequestAsStream() + { + $payload = Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3'); + + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + [ + 'model_id' => ElevenLabs::ELEVEN_MULTILINGUAL_V2, + 'can_do_text_to_speech' => true, + ], + ]), + new MockResponse($payload->asBinary()), + ]); + + $client = new ElevenLabsClient( + $httpClient, + 'https://api.elevenlabs.io/v1', + 'my-api-key', + ); + + $result = $client->request(new ElevenLabs(options: [ + 'voice' => 'Dslrhjl3ZpzrctukrQSN', + 'stream' => true, + ]), [ + 'text' => 'foo', + ]); + + $this->assertInstanceOf(RawHttpResult::class, $result); + $this->assertSame(2, $httpClient->getRequestsCount()); + } } diff --git a/src/platform/tests/Bridge/ElevenLabs/ElevenLabsConverterTest.php b/src/platform/tests/Bridge/ElevenLabs/ElevenLabsConverterTest.php index 6425ce558..df09588c6 100644 --- a/src/platform/tests/Bridge/ElevenLabs/ElevenLabsConverterTest.php +++ b/src/platform/tests/Bridge/ElevenLabs/ElevenLabsConverterTest.php @@ -20,6 +20,7 @@ use Symfony\AI\Platform\Result\BinaryResult; use Symfony\AI\Platform\Result\InMemoryRawResult; use Symfony\AI\Platform\Result\TextResult; +use Symfony\Component\HttpClient\MockHttpClient; #[CoversClass(ElevenLabsResultConverter::class)] #[UsesClass(ElevenLabs::class)] @@ -31,7 +32,7 @@ final class ElevenLabsConverterTest extends TestCase { public function testSupportsModel() { - $converter = new ElevenLabsResultConverter(); + $converter = new ElevenLabsResultConverter(new MockHttpClient()); $this->assertTrue($converter->supports(new ElevenLabs())); $this->assertFalse($converter->supports(new Model('any-model'))); @@ -39,7 +40,7 @@ public function testSupportsModel() public function testConvertSpeechToTextResponse() { - $converter = new ElevenLabsResultConverter(); + $converter = new ElevenLabsResultConverter(new MockHttpClient()); $rawResult = new InMemoryRawResult([ 'text' => 'Hello there', ], new class { @@ -57,7 +58,7 @@ public function getInfo(): string public function testConvertTextToSpeechResponse() { - $converter = new ElevenLabsResultConverter(); + $converter = new ElevenLabsResultConverter(new MockHttpClient()); $rawResult = new InMemoryRawResult([], new class { public function getInfo(): string {