4040 TranscriptionResultNotFound
4141)
4242from .base import GoogleService
43+ from .locations import get_speech_location
4344
4445# https://cloud.google.com/speech-to-text/docs/quotas
4546ASYNC_MAX_LENGTH = timedelta (minutes = 479 )
46- DEFAULT_SPEECH_LOCATION = 'global'
47- DEFAULT_SPEECH_MODEL = 'long'
47+
48+ # Fallback STT model used when a language has no `model_code` set in the
49+ # `TranscriptionServiceLanguageM2M` database table. 'chirp_3' is chosen over
50+ # 'long' because it is available for every language in the 'us' and 'eu'
51+ # multi-region endpoints, and it supports all recognition features
52+ # (e.g. enable_automatic_punctuation)
53+ DEFAULT_SPEECH_MODEL = 'chirp_3'
4854
4955
5056class GoogleTranscriptionService (GoogleService ):
@@ -58,6 +64,7 @@ def __init__(self, submission: dict, asset: 'kpi.models.Asset', *args, **kwargs)
5864 class. It uses Google Cloud Speech-to-Text v2 batch API.
5965 """
6066 super ().__init__ (submission = submission , asset = asset , * args , ** kwargs )
67+ self .speech_location = get_speech_location ()
6168
6269 def adapt_response (self , response : Union [dict , list ]) -> str :
6370 """
@@ -102,7 +109,6 @@ def begin_google_operation(
102109 target_lang : str ,
103110 content : Any ,
104111 * ,
105- location_code : str | None = None ,
106112 model_code : str | None = None ,
107113 ) -> tuple [object , int ]:
108114 """
@@ -115,27 +121,31 @@ def begin_google_operation(
115121 'Audio file of duration %s is too long.' % duration
116122 )
117123
118- speech_location = location_code or DEFAULT_SPEECH_LOCATION
119124 speech_model = model_code or DEFAULT_SPEECH_MODEL
120- speech_client = self ._get_speech_client (speech_location )
125+ speech_client = self ._get_speech_client (self . speech_location )
121126 input_path , output_prefix = self ._get_batch_paths (xpath , source_lang )
122127
123128 logging .info (
124129 'Starting Google automatic transcription for '
125130 f'{ self .submission_root_uuid = } , { xpath = } , { source_lang = } , '
126- f'{ speech_location = } , { speech_model = } '
131+ f'{ self . speech_location = } , { speech_model = } '
127132 )
128133 self ._cleanup_batch_files (xpath , source_lang )
129134 gcs_input_uri = self .store_file (flac_content , input_path )
130135
131136 request = speech .BatchRecognizeRequest (
132- recognizer = self ._get_recognizer_name (speech_location ),
137+ recognizer = self ._get_recognizer_name (self . speech_location ),
133138 config = speech .RecognitionConfig (
134139 auto_decoding_config = speech .AutoDetectDecodingConfig (),
135140 language_codes = [source_lang ],
136141 model = speech_model ,
137142 features = speech .RecognitionFeatures (
138- enable_automatic_punctuation = True
143+ # chirp_3, chirp_2, and chirp support automatic punctuation
144+ # for all languages. 'long' does not support it for several
145+ # languages, including the 6 legacy African languages
146+ # (Kinyarwanda, Swati, Southern Sotho, Tswana, Tsonga, Venda),
147+ # and will return a 400 error if enabled
148+ enable_automatic_punctuation = (speech_model != 'long' ),
139149 ),
140150 ),
141151 files = [speech .BatchRecognizeFileMetadata (uri = gcs_input_uri )],
@@ -152,6 +162,11 @@ def begin_google_operation(
152162 def counter_name (self ):
153163 return 'google_asr_seconds'
154164
165+ def get_client_options (self ):
166+ return client_options .ClientOptions (
167+ api_endpoint = f'{ self .speech_location } -speech.googleapis.com'
168+ )
169+
155170 def get_converted_audio (
156171 self , xpath : str , submission_uuid : int , user : object
157172 ) -> Union [bytes , tuple [bytes , timedelta ]]:
@@ -227,7 +242,6 @@ def process_data(
227242 source_lang = source_language ,
228243 target_lang = None ,
229244 content = converted_audio ,
230- location_code = language_config .location_code ,
231245 model_code = language_config .model_code ,
232246 )
233247 except AudioTooLongError as err :
@@ -306,7 +320,6 @@ def process_data(
306320 # read the batch result after Google reports completion
307321 operation_payload = self ._get_operation_payload (
308322 operation_name ,
309- language_config .location_code ,
310323 )
311324 if not operation_payload .get ('done' ):
312325 raise SubsequenceTimeoutError
@@ -449,12 +462,12 @@ def _get_speech_client(self, location: str):
449462 """
450463 Create a Speech client bound to the configured regional endpoint
451464 """
452- client_kwargs = { 'credentials' : self . credentials }
453- if location != DEFAULT_SPEECH_LOCATION :
454- client_kwargs [ ' client_options' ] = client_options .ClientOptions (
465+ return speech . SpeechClient (
466+ credentials = self . credentials ,
467+ client_options = client_options .ClientOptions (
455468 api_endpoint = f'{ location } -speech.googleapis.com'
456- )
457- return speech . SpeechClient ( ** client_kwargs )
469+ ),
470+ )
458471
459472 def _get_recognizer_name (self , location : str ) -> str :
460473 """
@@ -468,14 +481,11 @@ def _get_recognizer_name(self, location: str) -> str:
468481 def _get_operation_payload (
469482 self ,
470483 operation_name : str ,
471- location_code : str | None = None ,
472484 ) -> dict :
473485 """
474486 Poll the Google long-running operation backing the batch request.
475487 """
476- speech_client = self ._get_speech_client (
477- location_code or DEFAULT_SPEECH_LOCATION
478- )
488+ speech_client = self ._get_speech_client (self .speech_location )
479489 operation = speech_client .transport .operations_client .get_operation (
480490 operation_name
481491 )
0 commit comments