From fc810feb3c762abc8ac18853148fcf196f1df312 Mon Sep 17 00:00:00 2001 From: Mudabir Hussain Date: Mon, 7 Oct 2024 15:56:17 +0400 Subject: [PATCH 1/2] feature: Add common TranscriptionModel interface for audio transcription - Created TranscriptionModel interface that extends Model - Implemented `call(AudioTranscriptionPrompt)` method for better compatibility between OpenAI and Azure OpenAI transcription models - Added default convenience methods for handling Resource and AudioTranscriptionOptions to return transcription as a String --- .../AzureOpenAiAudioTranscriptionModel.java | 4 ++-- .../openai/OpenAiAudioTranscriptionModel.java | 4 ++-- .../ai/model/TranscriptionModel.java | 21 +++++++++++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 spring-ai-core/src/main/java/org/springframework/ai/model/TranscriptionModel.java diff --git a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.java b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.java index 1d1e4afd941..39649a7db71 100644 --- a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.java +++ b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.java @@ -29,8 +29,8 @@ import org.springframework.ai.azure.openai.AzureOpenAiAudioTranscriptionOptions.StructuredResponse.Word; import org.springframework.ai.azure.openai.AzureOpenAiAudioTranscriptionOptions.TranscriptResponseFormat; import org.springframework.ai.azure.openai.metadata.AzureOpenAiAudioTranscriptionResponseMetadata; -import org.springframework.ai.model.Model; import org.springframework.ai.model.ModelOptionsUtils; +import org.springframework.ai.model.TranscriptionModel; import org.springframework.core.io.Resource; import org.springframework.util.Assert; import org.springframework.util.StringUtils; @@ -45,7 +45,7 @@ * * @author Piotr Olaszewski */ -public class AzureOpenAiAudioTranscriptionModel implements Model { +public class AzureOpenAiAudioTranscriptionModel implements TranscriptionModel { private static final List JSON_FORMATS = List.of(AudioTranscriptionFormat.JSON, AudioTranscriptionFormat.VERBOSE_JSON); diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioTranscriptionModel.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioTranscriptionModel.java index fbf51bb78ed..516f680ce16 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioTranscriptionModel.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioTranscriptionModel.java @@ -35,7 +35,7 @@ import org.slf4j.LoggerFactory; import org.springframework.ai.chat.metadata.RateLimit; -import org.springframework.ai.model.Model; +import org.springframework.ai.model.TranscriptionModel; import org.springframework.ai.openai.api.OpenAiAudioApi; import org.springframework.ai.openai.api.OpenAiAudioApi.StructuredResponse; import org.springframework.ai.audio.transcription.AudioTranscription; @@ -60,7 +60,7 @@ * @see OpenAiAudioApi * @since 0.8.1 */ -public class OpenAiAudioTranscriptionModel implements Model { +public class OpenAiAudioTranscriptionModel implements TranscriptionModel { private final Logger logger = LoggerFactory.getLogger(getClass()); diff --git a/spring-ai-core/src/main/java/org/springframework/ai/model/TranscriptionModel.java b/spring-ai-core/src/main/java/org/springframework/ai/model/TranscriptionModel.java new file mode 100644 index 00000000000..2af110e5074 --- /dev/null +++ b/spring-ai-core/src/main/java/org/springframework/ai/model/TranscriptionModel.java @@ -0,0 +1,21 @@ +package org.springframework.ai.model; + +import org.springframework.ai.audio.transcription.AudioTranscriptionOptions; +import org.springframework.ai.audio.transcription.AudioTranscriptionPrompt; +import org.springframework.ai.audio.transcription.AudioTranscriptionResponse; +import org.springframework.core.io.Resource; + +public interface TranscriptionModel extends Model { + + AudioTranscriptionResponse call(AudioTranscriptionPrompt transcriptionPrompt); + + default String transcribe(Resource resource) { + AudioTranscriptionPrompt prompt = new AudioTranscriptionPrompt(resource); + return this.call(prompt).getResult().getOutput(); + } + + default String transcribe(Resource resource, AudioTranscriptionOptions options) { + AudioTranscriptionPrompt prompt = new AudioTranscriptionPrompt(resource, options); + return this.call(prompt).getResult().getOutput(); + } +} From 4c8ce3546fe2a6db4a0d36548a328cf909deb76c Mon Sep 17 00:00:00 2001 From: Mudabir Hussain Date: Mon, 7 Oct 2024 18:08:15 +0400 Subject: [PATCH 2/2] Spring Java Format Fix --- .../java/org/springframework/ai/model/TranscriptionModel.java | 1 + 1 file changed, 1 insertion(+) diff --git a/spring-ai-core/src/main/java/org/springframework/ai/model/TranscriptionModel.java b/spring-ai-core/src/main/java/org/springframework/ai/model/TranscriptionModel.java index 2af110e5074..475b5f49fcc 100644 --- a/spring-ai-core/src/main/java/org/springframework/ai/model/TranscriptionModel.java +++ b/spring-ai-core/src/main/java/org/springframework/ai/model/TranscriptionModel.java @@ -18,4 +18,5 @@ default String transcribe(Resource resource, AudioTranscriptionOptions options) AudioTranscriptionPrompt prompt = new AudioTranscriptionPrompt(resource, options); return this.call(prompt).getResult().getOutput(); } + }