1- #!/usr/bin/env python
2- # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3- # SPDX-License-Identifier: MIT-0
4-
51import argparse
6-
72import boto3
8-
93from botocore .exceptions import ClientError
104from pptx import Presentation
115from pptx .enum .lang import MSO_LANGUAGE_ID
126
13-
147LANGUAGE_CODE_TO_LANGUAGE_ID = {
158"""
169Dict that maps Amazon Translate language code to MSO_LANGUAGE_ID enum value.
7265 'uk' : MSO_LANGUAGE_ID .UKRAINIAN ,
7366 'ur' : MSO_LANGUAGE_ID .URDU ,
7467 'vi' : MSO_LANGUAGE_ID .VIETNAMESE ,
75- 'zh' : MSO_LANGUAGE_ID .CHINESE_SINGAPORE ,
68+ 'zh' : MSO_LANGUAGE_ID .CHINESE_SINGAPORE ,
7669 'zh-TW' : MSO_LANGUAGE_ID .CHINESE_HONG_KONG_SAR ,
7770}
7871
7972TERMINOLOGY_NAME = 'pptx-translator-terminology'
8073
81-
8274translate = boto3 .client (service_name = 'translate' )
8375
84-
8576def translate_presentation (presentation , source_language_code , target_language_code , terminology_names ):
86- slide_number = 1
87- for slide in presentation .slides :
88- print ('Slide {slide_number} of {number_of_slides}' .format (
89- slide_number = slide_number ,
90- number_of_slides = len (presentation .slides )))
91- slide_number += 1
92-
93- # translate comments
77+ for slide_index , slide in enumerate (presentation .slides , start = 1 ):
78+ print (f'Slide { slide_index } of { len (presentation .slides )} ' )
79+
80+ for shape in slide .shapes :
81+ if shape .has_table :
82+ for row in shape .table .rows :
83+ for cell in row .cells :
84+ translate_text_frame (cell .text_frame , source_language_code , target_language_code , terminology_names )
85+ elif shape .has_text_frame :
86+ translate_text_frame (shape .text_frame , source_language_code , target_language_code , terminology_names )
87+
9488 if slide .has_notes_slide :
95- text_frame = slide .notes_slide .notes_text_frame
96- if len (text_frame .text ) > 0 :
89+ translate_text_frame (slide .notes_slide .notes_text_frame , source_language_code , target_language_code , terminology_names )
90+
91+ def translate_text_frame (text_frame , source_language_code , target_language_code , terminology_names ):
92+ for paragraph in text_frame .paragraphs :
93+ for run in paragraph .runs :
94+ if run .text .strip ():
9795 try :
9896 response = translate .translate_text (
99- Text = text_frame .text ,
100- SourceLanguageCode = source_language_code ,
101- TargetLanguageCode = target_language_code ,
102- TerminologyNames = terminology_names )
103- slide .notes_slide .notes_text_frame .text = response .get ('TranslatedText' )
97+ Text = run .text ,
98+ SourceLanguageCode = source_language_code ,
99+ TargetLanguageCode = target_language_code ,
100+ TerminologyNames = terminology_names
101+ )
102+ # original text if translation fails
103+ run .text = response .get ('TranslatedText' , run .text )
104104 except ClientError as client_error :
105- if (client_error .response ['Error' ]['Code' ] == 'ValidationException' ):
106- # Text not valid. Maybe the size of the text exceeds the size limit of the service.
107- # Amazon Translate limits: https://docs.aws.amazon.com/translate/latest/dg/what-is-limits.html
108- # We just ignore and don't translate the text.
105+ if client_error .response ['Error' ]['Code' ] == 'ValidationException' :
109106 print ('Invalid text. Ignoring...' )
110107
111- # translate other texts
112- for shape in slide .shapes :
113- if not shape .has_text_frame :
114- continue
115- for paragraph in shape .text_frame .paragraphs :
116- for index , paragraph_run in enumerate (paragraph .runs ):
117- try :
118- response = translate .translate_text (
119- Text = paragraph_run .text ,
120- SourceLanguageCode = source_language_code ,
121- TargetLanguageCode = target_language_code ,
122- TerminologyNames = terminology_names )
123- paragraph .runs [index ].text = response .get ('TranslatedText' )
124- paragraph .runs [index ].font .language_id = LANGUAGE_CODE_TO_LANGUAGE_ID [target_language_code ]
125- except ClientError as client_error :
126- if (client_error .response ['Error' ]['Code' ] == 'ValidationException' ):
127- # Text not valid. Maybe the size of the text exceeds the size limit of the service.
128- # Amazon Translate limits: https://docs.aws.amazon.com/translate/latest/dg/what-is-limits.html
129- # We just ignore and don't translate the text.
130- print ('Invalid text. Ignoring...' )
131-
132-
133108def import_terminology (terminology_file_path ):
134- print ('Importing terminology data from {file_path }...' . format ( file_path = terminology_file_path ) )
109+ print (f 'Importing terminology data from { terminology_file_path } ...' )
135110 with open (terminology_file_path , 'rb' ) as f :
136111 translate .import_terminology (Name = TERMINOLOGY_NAME ,
137112 MergeStrategy = 'OVERWRITE' ,
138113 TerminologyData = {'File' : bytearray (f .read ()), 'Format' : 'CSV' })
139114
140-
141115def main ():
142116 argument_parser = argparse .ArgumentParser (
143117 'Translates pptx files from source language to target language using Amazon Translate service' )
@@ -160,21 +134,17 @@ def main():
160134 import_terminology (args .terminology )
161135 terminology_names = [TERMINOLOGY_NAME ]
162136
163- print ('Translating {file_path} from {source_language_code} to {target_language_code}...' .format (
164- file_path = args .input_file_path ,
165- source_language_code = args .source_language_code ,
166- target_language_code = args .target_language_code ))
137+ print (f'Translating { args .input_file_path } from { args .source_language_code } to { args .target_language_code } ...' )
167138 presentation = Presentation (args .input_file_path )
168139 translate_presentation (presentation ,
169140 args .source_language_code ,
170141 args .target_language_code ,
171142 terminology_names )
172143
173144 output_file_path = args .input_file_path .replace (
174- '.pptx' , '-{language_code }.pptx' . format ( language_code = args . target_language_code ) )
175- print ('Saving {output_file_path}...' . format ( output_file_path = output_file_path ) )
145+ '.pptx' , f '-{ args . target_language_code } .pptx' )
146+ print (f 'Saving { output_file_path } ...' )
176147 presentation .save (output_file_path )
177148
178-
179- if __name__ == '__main__' :
180- main ()
149+ if __name__ == '__main__' :
150+ main ()
0 commit comments