Skip to content

Commit 494bb85

Browse files
Merge pull request #4 from HouseoLogy/master
improve translation process
2 parents 8d48d60 + 316bf01 commit 494bb85

File tree

4 files changed

+62
-67
lines changed

4 files changed

+62
-67
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
.venv
2+
tmp/
3+
.DS_Store

README.md

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,32 @@ Python script that translates pptx files using Amazon Translate service.
44

55
## Installation
66

7-
```
7+
```bash
88
$ virtualenv venv
99
$ source venv/bin/activate
1010
$ pip install -r requirements.txt
1111
```
1212

1313
## Usage
14+
15+
Basic translation:
16+
```bash
17+
python pptx-translator.py source_language_code target_language_code input_file_path
18+
```
19+
20+
Example execution:
21+
```bash
22+
python pptx-translator.py ja en input-file.pptx
23+
```
24+
25+
For more information on available options:
26+
```bash
27+
python pptx-translator.py --help
28+
```
29+
30+
## Command-line Arguments
31+
1432
```
15-
$ python pptx-translator.py --help
1633
usage: Translates pptx files from source language to target language using Amazon Translate service
1734
[-h] [--terminology TERMINOLOGY]
1835
source_language_code target_language_code input_file_path
@@ -30,10 +47,15 @@ optional arguments:
3047
The path of the terminology CSV file
3148
```
3249

50+
## Features
51+
52+
- Translates PowerPoint (.pptx) files from one language to another using Amazon Translate
53+
- Supports custom terminology for translation
54+
3355
## Security
3456

3557
See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
3658

3759
## License
3860

39-
This library is licensed under the MIT-0 License. See the LICENSE file.
61+
This library is licensed under the MIT-0 License. See the LICENSE file.

pptx-translator.py

Lines changed: 32 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,9 @@
1-
#!/usr/bin/env python
2-
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3-
# SPDX-License-Identifier: MIT-0
4-
51
import argparse
6-
72
import boto3
8-
93
from botocore.exceptions import ClientError
104
from pptx import Presentation
115
from pptx.enum.lang import MSO_LANGUAGE_ID
126

13-
147
LANGUAGE_CODE_TO_LANGUAGE_ID = {
158
"""
169
Dict that maps Amazon Translate language code to MSO_LANGUAGE_ID enum value.
@@ -72,72 +65,53 @@
7265
'uk': MSO_LANGUAGE_ID.UKRAINIAN,
7366
'ur': MSO_LANGUAGE_ID.URDU,
7467
'vi': MSO_LANGUAGE_ID.VIETNAMESE,
75-
'zh': MSO_LANGUAGE_ID.CHINESE_SINGAPORE ,
68+
'zh': MSO_LANGUAGE_ID.CHINESE_SINGAPORE,
7669
'zh-TW': MSO_LANGUAGE_ID.CHINESE_HONG_KONG_SAR,
7770
}
7871

7972
TERMINOLOGY_NAME = 'pptx-translator-terminology'
8073

81-
8274
translate = boto3.client(service_name='translate')
8375

84-
8576
def translate_presentation(presentation, source_language_code, target_language_code, terminology_names):
86-
slide_number = 1
87-
for slide in presentation.slides:
88-
print('Slide {slide_number} of {number_of_slides}'.format(
89-
slide_number=slide_number,
90-
number_of_slides=len(presentation.slides)))
91-
slide_number += 1
92-
93-
# translate comments
77+
for slide_index, slide in enumerate(presentation.slides, start=1):
78+
print(f'Slide {slide_index} of {len(presentation.slides)}')
79+
80+
for shape in slide.shapes:
81+
if shape.has_table:
82+
for row in shape.table.rows:
83+
for cell in row.cells:
84+
translate_text_frame(cell.text_frame, source_language_code, target_language_code, terminology_names)
85+
elif shape.has_text_frame:
86+
translate_text_frame(shape.text_frame, source_language_code, target_language_code, terminology_names)
87+
9488
if slide.has_notes_slide:
95-
text_frame = slide.notes_slide.notes_text_frame
96-
if len(text_frame.text) > 0:
89+
translate_text_frame(slide.notes_slide.notes_text_frame, source_language_code, target_language_code, terminology_names)
90+
91+
def translate_text_frame(text_frame, source_language_code, target_language_code, terminology_names):
92+
for paragraph in text_frame.paragraphs:
93+
for run in paragraph.runs:
94+
if run.text.strip():
9795
try:
9896
response = translate.translate_text(
99-
Text=text_frame.text,
100-
SourceLanguageCode=source_language_code,
101-
TargetLanguageCode=target_language_code,
102-
TerminologyNames=terminology_names)
103-
slide.notes_slide.notes_text_frame.text = response.get('TranslatedText')
97+
Text=run.text,
98+
SourceLanguageCode=source_language_code,
99+
TargetLanguageCode=target_language_code,
100+
TerminologyNames=terminology_names
101+
)
102+
# original text if translation fails
103+
run.text = response.get('TranslatedText', run.text)
104104
except ClientError as client_error:
105-
if (client_error.response['Error']['Code'] == 'ValidationException'):
106-
# Text not valid. Maybe the size of the text exceeds the size limit of the service.
107-
# Amazon Translate limits: https://docs.aws.amazon.com/translate/latest/dg/what-is-limits.html
108-
# We just ignore and don't translate the text.
105+
if client_error.response['Error']['Code'] == 'ValidationException':
109106
print('Invalid text. Ignoring...')
110107

111-
# translate other texts
112-
for shape in slide.shapes:
113-
if not shape.has_text_frame:
114-
continue
115-
for paragraph in shape.text_frame.paragraphs:
116-
for index, paragraph_run in enumerate(paragraph.runs):
117-
try:
118-
response = translate.translate_text(
119-
Text=paragraph_run.text,
120-
SourceLanguageCode=source_language_code,
121-
TargetLanguageCode=target_language_code,
122-
TerminologyNames=terminology_names)
123-
paragraph.runs[index].text = response.get('TranslatedText')
124-
paragraph.runs[index].font.language_id = LANGUAGE_CODE_TO_LANGUAGE_ID[target_language_code]
125-
except ClientError as client_error:
126-
if (client_error.response['Error']['Code'] == 'ValidationException'):
127-
# Text not valid. Maybe the size of the text exceeds the size limit of the service.
128-
# Amazon Translate limits: https://docs.aws.amazon.com/translate/latest/dg/what-is-limits.html
129-
# We just ignore and don't translate the text.
130-
print('Invalid text. Ignoring...')
131-
132-
133108
def import_terminology(terminology_file_path):
134-
print('Importing terminology data from {file_path}...'.format(file_path=terminology_file_path))
109+
print(f'Importing terminology data from {terminology_file_path}...')
135110
with open(terminology_file_path, 'rb') as f:
136111
translate.import_terminology(Name=TERMINOLOGY_NAME,
137112
MergeStrategy='OVERWRITE',
138113
TerminologyData={'File': bytearray(f.read()), 'Format': 'CSV'})
139114

140-
141115
def main():
142116
argument_parser = argparse.ArgumentParser(
143117
'Translates pptx files from source language to target language using Amazon Translate service')
@@ -160,21 +134,17 @@ def main():
160134
import_terminology(args.terminology)
161135
terminology_names = [TERMINOLOGY_NAME]
162136

163-
print('Translating {file_path} from {source_language_code} to {target_language_code}...'.format(
164-
file_path=args.input_file_path,
165-
source_language_code=args.source_language_code,
166-
target_language_code=args.target_language_code))
137+
print(f'Translating {args.input_file_path} from {args.source_language_code} to {args.target_language_code}...')
167138
presentation = Presentation(args.input_file_path)
168139
translate_presentation(presentation,
169140
args.source_language_code,
170141
args.target_language_code,
171142
terminology_names)
172143

173144
output_file_path = args.input_file_path.replace(
174-
'.pptx', '-{language_code}.pptx'.format(language_code=args.target_language_code))
175-
print('Saving {output_file_path}...'.format(output_file_path=output_file_path))
145+
'.pptx', f'-{args.target_language_code}.pptx')
146+
print(f'Saving {output_file_path}...')
176147
presentation.save(output_file_path)
177148

178-
179-
if __name__== '__main__':
180-
main()
149+
if __name__ == '__main__':
150+
main()

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
boto3==1.11.14
2-
python-pptx==0.6.18
1+
boto3==1.35.42
2+
python-pptx==1.0.2

0 commit comments

Comments
 (0)