Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
23 changes: 20 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# pptx-translator

Python script that translates pptx files using Amazon Translate service.
Python script that translates pptx files using Amazon Bedrock (Claude Sonet) or Amazon Translate.

## Installation

Expand All @@ -13,8 +13,8 @@ $ pip install -r requirements.txt
## Usage
```
$ python pptx-translator.py --help
usage: Translates pptx files from source language to target language using Amazon Translate service
[-h] [--terminology TERMINOLOGY]
usage: Translates pptx files from source language to target language using Amazon Translate service or Bedrock-based translation
[-h] [--terminology TERMINOLOGY] [--use-bedrock]
source_language_code target_language_code input_file_path

positional arguments:
Expand All @@ -28,8 +28,25 @@ optional arguments:
-h, --help show this help message and exit
--terminology TERMINOLOGY
The path of the terminology CSV file
--use-bedrock Use Bedrock-based translation with Claude Sonet model
```

### Using Amazon Translate
To translate a presentation using Amazon Translate:

```
python pptx-translator.py en es input.pptx
```

### Using Bedrock-based Translation
To translate a presentation using the Bedrock-based translation with Claude Sonet model:

```
python pptx-translator.py en es input.pptx --use-bedrock
```

This will use the Claude Sonet model to perform the translation, which may provide improved results for certain language pairs or content types. Note: Bedrock translation does not use the terminology file

## Security

See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
Expand Down
99 changes: 84 additions & 15 deletions pptx-translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import argparse

import boto3
import json

from botocore.exceptions import ClientError
from botocore.config import Config
from pptx import Presentation
from pptx.enum.lang import MSO_LANGUAGE_ID

Expand Down Expand Up @@ -78,11 +80,62 @@

TERMINOLOGY_NAME = 'pptx-translator-terminology'

CONFIG = Config(
retries = {
'max_attempts': 10,
'mode': 'standard'
}
)

translate = boto3.client(service_name='translate')

translate = boto3.client(service_name='translate', config=CONFIG)
bedrock = boto3.client(service_name='bedrock-runtime', config=CONFIG)

def translate_presentation(presentation, source_language_code, target_language_code, terminology_names):
def bedrock_translate(text, source_language_code, target_language_code):
print('.', end='')
prompt = f"""
Human:
Translate [Text] into [Language]. Understand the meaning of [text] and find relevant
words that best suite a PowerPoint presentation. Respond with the translated text only.

Language = {target_language_code}
Text= {text}


Assistant:
Here is the translated text:
"""
body=json.dumps(
{
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": 1000,
"temperature": 0.0,
"system": "You are a expert translator fluent in multiple language",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
}
]
}
]
}
)

response = bedrock.invoke_model(
modelId="anthropic.claude-3-sonnet-20240229-v1:0",
body=body
)

response_body = json.loads(response.get('body').read())
translated_text = response_body["content"][0]["text"]
return translated_text


def translate_presentation(presentation, source_language_code, target_language_code, terminology_names, use_bedrock=False):
slide_number = 1
for slide in presentation.slides:
print('Slide {slide_number} of {number_of_slides}'.format(
Expand All @@ -95,18 +148,23 @@ def translate_presentation(presentation, source_language_code, target_language_c
text_frame = slide.notes_slide.notes_text_frame
if len(text_frame.text) > 0:
try:
response = translate.translate_text(
Text=text_frame.text,
SourceLanguageCode=source_language_code,
TargetLanguageCode=target_language_code,
TerminologyNames=terminology_names)
slide.notes_slide.notes_text_frame.text = response.get('TranslatedText')
if use_bedrock:
translated_text = bedrock_translate(text_frame.text, source_language_code, target_language_code)
slide.notes_slide.notes_text_frame.text = translated_text
else:
response = translate.translate_text(
Text=text_frame.text,
SourceLanguageCode=source_language_code,
TargetLanguageCode=target_language_code,
TerminologyNames=terminology_names)
slide.notes_slide.notes_text_frame.text = response.get('TranslatedText')
except ClientError as client_error:
if (client_error.response['Error']['Code'] == 'ValidationException'):
# Text not valid. Maybe the size of the text exceeds the size limit of the service.
# Amazon Translate limits: https://docs.aws.amazon.com/translate/latest/dg/what-is-limits.html
# We just ignore and don't translate the text.
print('Invalid text. Ignoring...')
print(json.dumps(client_error.response))

# translate other texts
for shape in slide.shapes:
Expand All @@ -115,19 +173,26 @@ def translate_presentation(presentation, source_language_code, target_language_c
for paragraph in shape.text_frame.paragraphs:
for index, paragraph_run in enumerate(paragraph.runs):
try:
response = translate.translate_text(
Text=paragraph_run.text,
SourceLanguageCode=source_language_code,
TargetLanguageCode=target_language_code,
TerminologyNames=terminology_names)
paragraph.runs[index].text = response.get('TranslatedText')
if use_bedrock:
translated_text = bedrock_translate(paragraph_run.text, source_language_code, target_language_code)
paragraph.runs[index].text = translated_text
else:
response = translate.translate_text(
Text=paragraph_run.text,
SourceLanguageCode=source_language_code,
TargetLanguageCode=target_language_code,
TerminologyNames=terminology_names)
paragraph.runs[index].text = response.get('TranslatedText')
paragraph.runs[index].font.language_id = LANGUAGE_CODE_TO_LANGUAGE_ID[target_language_code]
except ClientError as client_error:
if (client_error.response['Error']['Code'] == 'ValidationException'):
# Text not valid. Maybe the size of the text exceeds the size limit of the service.
# Amazon Translate limits: https://docs.aws.amazon.com/translate/latest/dg/what-is-limits.html
# We just ignore and don't translate the text.
print('Invalid text. Ignoring...')
print(json.dumps(client_error.response))
exit(1)



def import_terminology(terminology_file_path):
Expand All @@ -153,6 +218,9 @@ def main():
argument_parser.add_argument(
'--terminology', type=str,
help='The path of the terminology CSV file')
argument_parser.add_argument(
'--use-bedrock', action='store_true',
help='Use Bedrock-based translation with Claude Sonet model')
args = argument_parser.parse_args()

terminology_names = []
Expand All @@ -168,7 +236,8 @@ def main():
translate_presentation(presentation,
args.source_language_code,
args.target_language_code,
terminology_names)
terminology_names,
args.use_bedrock)

output_file_path = args.input_file_path.replace(
'.pptx', '-{language_code}.pptx'.format(language_code=args.target_language_code))
Expand Down
14 changes: 12 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,12 @@
boto3==1.11.14
python-pptx==0.6.18
boto3==1.35.24
botocore==1.35.24
jmespath==1.0.1
lxml==5.3.0
pillow==10.4.0
python-dateutil==2.9.0.post0
python-pptx==1.0.2
s3transfer==0.10.2
six==1.16.0
typing_extensions==4.12.2
urllib3==2.2.3
XlsxWriter==3.2.0