Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions src/subtitle_tool/ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,19 +204,23 @@ class AISubtitler:
temperature (float): model temperature
temperature_adj (float): by how much the model temperature will be
adjusted for retries after incorrect content generation.
media_lang (str): Media language
subtitle_lang (str): Subtitle language
system_prompt (str): system prompt driving the model. There is
a default prompt already provided, override only if necessary.
"""

model_name: str
api_key: str
subtitle_lang: str
media_lang: str = "English"
delete_temp_files: bool = True
temperature: float = 0.1
temperature_adj: float = 0.01
system_prompt: str = """
# YOUR ROLE
- You work as a transcriber of audio clips for English, delivering perfect transcriptions.
- You know many languages, and you can recognize the language spoken in the audio and write the subtitle accordingly.
- You work as a transcriber of audio clips in %s %s, delivering perfect transcriptions %s.
- You know many languages, and you can recognize the language spoken in the audio and write the subtitle accordingly.
- Your work is to take an audio file and output a high-quality, perfect transcription synchronized with spoken dialogue,
- You strictly follow the JSON format specified, and your output is only the subtitle content in this JSON format.
- You *DO NOT* subtitle music or music moods.
Expand Down Expand Up @@ -298,6 +302,16 @@ class AISubtitler:
def __post_init__(self):
self.client = genai.Client(api_key=self.api_key)
self.metrics = OperationMetrics()
subtitle_lang_prompt = ""
subtitle_lang_directive = ""
if self.subtitle_lang:
subtitle_lang_prompt = f"to {self.subtitle_lang}"
subtitle_lang_directive = f"in {self.subtitle_lang}"
self.system_prompt = self.system_prompt % (
self.media_lang,
subtitle_lang_prompt,
subtitle_lang_directive,
)

def _ai_retry_handler(self, exception: BaseException) -> bool:
"""
Expand Down Expand Up @@ -554,6 +568,8 @@ def _generate_subtitles(
]

user_prompt = f"Create subtitles for this audio file that has a duration of {duration} milliseconds" # noqa: E501
logger.debug(f"User prompt: {user_prompt}")
logger.debug(f"System prompt: {self.system_prompt}")

response = self.client.models.generate_content(
model=self.model_name,
Expand Down
12 changes: 11 additions & 1 deletion src/subtitle_tool/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ def setup_logging(verbose=False, debug=False):
default=5,
show_default=True,
)
@click.option(
"--media-lang", help="Media file language", type=click.STRING, default="English"
)
@click.option("--subtitle-lang", help="Language for subtitle output", type=click.STRING)
def main(
mediafile: Path,
api_key: str,
Expand All @@ -135,6 +139,8 @@ def main(
keep_temp_files: bool,
audio_segment_length: int,
parallel_segments: int,
media_lang: str,
subtitle_lang: str,
) -> None:
"""Generate subtitles for a media file"""
setup_logging(debug=debug, verbose=verbose)
Expand Down Expand Up @@ -172,7 +178,11 @@ def main(
click.echo(f"Generating subtitles with {ai_model}...")

subtitler = AISubtitler(
api_key=api_key, model_name=ai_model, delete_temp_files=not keep_temp_files
api_key=api_key,
model_name=ai_model,
delete_temp_files=not keep_temp_files,
media_lang=media_lang,
subtitle_lang=subtitle_lang,
)

executor = ThreadPoolExecutor(max_workers=parallel_segments)
Expand Down
4 changes: 2 additions & 2 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading