diff --git a/src/subtitle_tool/ai.py b/src/subtitle_tool/ai.py index 72288d2..1e51a23 100644 --- a/src/subtitle_tool/ai.py +++ b/src/subtitle_tool/ai.py @@ -204,19 +204,23 @@ class AISubtitler: temperature (float): model temperature temperature_adj (float): by how much the model temperature will be adjusted for retries after incorrect content generation. + media_lang (str): Media language + subtitle_lang (str): Subtitle language system_prompt (str): system prompt driving the model. There is a default prompt already provided, override only if necessary. """ model_name: str api_key: str + subtitle_lang: str + media_lang: str = "English" delete_temp_files: bool = True temperature: float = 0.1 temperature_adj: float = 0.01 system_prompt: str = """ # YOUR ROLE - - You work as a transcriber of audio clips for English, delivering perfect transcriptions. - - You know many languages, and you can recognize the language spoken in the audio and write the subtitle accordingly. + - You work as a transcriber of audio clips in %s %s, delivering perfect transcriptions %s. + - You know many languages, and you can recognize the language spoken in the audio and write the subtitle accordingly. - Your work is to take an audio file and output a high-quality, perfect transcription synchronized with spoken dialogue, - You strictly follow the JSON format specified, and your output is only the subtitle content in this JSON format. - You *DO NOT* subtitle music or music moods. @@ -298,6 +302,16 @@ class AISubtitler: def __post_init__(self): self.client = genai.Client(api_key=self.api_key) self.metrics = OperationMetrics() + subtitle_lang_prompt = "" + subtitle_lang_directive = "" + if self.subtitle_lang: + subtitle_lang_prompt = f"to {self.subtitle_lang}" + subtitle_lang_directive = f"in {self.subtitle_lang}" + self.system_prompt = self.system_prompt % ( + self.media_lang, + subtitle_lang_prompt, + subtitle_lang_directive, + ) def _ai_retry_handler(self, exception: BaseException) -> bool: """ @@ -554,6 +568,8 @@ def _generate_subtitles( ] user_prompt = f"Create subtitles for this audio file that has a duration of {duration} milliseconds" # noqa: E501 + logger.debug(f"User prompt: {user_prompt}") + logger.debug(f"System prompt: {self.system_prompt}") response = self.client.models.generate_content( model=self.model_name, diff --git a/src/subtitle_tool/cli.py b/src/subtitle_tool/cli.py index 11fe4be..2118062 100755 --- a/src/subtitle_tool/cli.py +++ b/src/subtitle_tool/cli.py @@ -125,6 +125,10 @@ def setup_logging(verbose=False, debug=False): default=5, show_default=True, ) +@click.option( + "--media-lang", help="Media file language", type=click.STRING, default="English" +) +@click.option("--subtitle-lang", help="Language for subtitle output", type=click.STRING) def main( mediafile: Path, api_key: str, @@ -135,6 +139,8 @@ def main( keep_temp_files: bool, audio_segment_length: int, parallel_segments: int, + media_lang: str, + subtitle_lang: str, ) -> None: """Generate subtitles for a media file""" setup_logging(debug=debug, verbose=verbose) @@ -172,7 +178,11 @@ def main( click.echo(f"Generating subtitles with {ai_model}...") subtitler = AISubtitler( - api_key=api_key, model_name=ai_model, delete_temp_files=not keep_temp_files + api_key=api_key, + model_name=ai_model, + delete_temp_files=not keep_temp_files, + media_lang=media_lang, + subtitle_lang=subtitle_lang, ) executor = ThreadPoolExecutor(max_workers=parallel_segments) diff --git a/uv.lock b/uv.lock index 9c8cae3..ad3047c 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.13" [[package]] @@ -492,7 +492,7 @@ wheels = [ [[package]] name = "subtitle-tool" -version = "0.1.52" +version = "0.1.56" source = { editable = "." } dependencies = [ { name = "audioop-lts" },