Skip to content

Commit 4e9ad6d

Browse files
committed
Fixes and improvements for recognize_assemblyai() method:
- Adds support for `AudioData` instance. Before, it only worked with a path to a file - Add more error handling - Removes inner `read_file` function since the requests module automatically handles chunking - Removes "content-type" from header since this is not needed - Add docstring - Add example code snippet in `examples/audio_transcribe.py` - List AssemblyAI in README
1 parent 8b07762 commit 4e9ad6d

File tree

3 files changed

+94
-28
lines changed

3 files changed

+94
-28
lines changed

README.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Speech recognition engine/API support:
3030
* `CMU Sphinx <http://cmusphinx.sourceforge.net/wiki/>`__ (works offline)
3131
* Google Speech Recognition
3232
* `Google Cloud Speech API <https://cloud.google.com/speech/>`__
33+
* `AssemblyAI API <https://www.assemblyai.com/>`__
3334
* `Wit.ai <https://wit.ai/>`__
3435
* `Microsoft Azure Speech <https://azure.microsoft.com/en-us/services/cognitive-services/speech/>`__
3536
* `Microsoft Bing Voice Recognition (Deprecated) <https://www.microsoft.com/cognitive-services/en-us/speech-api>`__
@@ -202,7 +203,7 @@ The solution is to decrease this threshold, or call ``recognizer_instance.adjust
202203
The recognizer doesn't understand my particular language/dialect.
203204
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
204205

205-
Try setting the recognition language to your language/dialect. To do this, see the documentation for ``recognizer_instance.recognize_sphinx``, ``recognizer_instance.recognize_google``, ``recognizer_instance.recognize_wit``, ``recognizer_instance.recognize_bing``, ``recognizer_instance.recognize_api``, ``recognizer_instance.recognize_houndify``, and ``recognizer_instance.recognize_ibm``.
206+
Try setting the recognition language to your language/dialect. To do this, see the documentation for ``recognizer_instance.recognize_sphinx``, ``recognizer_instance.recognize_google``, ``recognizer_instance.recognize_wit``, ``recognizer_instance.recognize_bing``, ``recognizer_instance.recognize_api``, ``recognizer_instance.recognize_houndify``, and ``recognizer_instance.recognize_ibm``, and ``recognizer_instance.recognize_assemblyai``.
206207

207208
For example, if your language/dialect is British English, it is better to use ``"en-GB"`` as the language rather than ``"en-US"``.
208209

examples/audio_transcribe.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,22 @@
8787
print("IBM Speech to Text could not understand audio")
8888
except sr.RequestError as e:
8989
print("Could not request results from IBM Speech to Text service; {0}".format(e))
90+
91+
# recognize speech using the AssemblyAI API
92+
ASSEMBLYAI_API_TOKEN = "INSERT ASSEMBLYAI API TOKEN HERE" # Get a Free token at https://www.assemblyai.com/
93+
try:
94+
r.recognize_assemblyai(audio, api_token=ASSEMBLYAI_API_TOKEN)
95+
except sr.TranscriptionNotReady as e:
96+
job_name = e.job_name
97+
except sr.TranscriptionFailed as e:
98+
print(e)
99+
except sr.RequestError as e:
100+
print("Could not request results from AssemblyAI service; {0}".format(e))
101+
102+
# wait a little bit, then query the transcript with the job_name...
103+
try:
104+
print("AssemblyAI thinks you said " + r.recognize_assemblyai(audio_data=None, api_token=ASSEMBLYAI_API_TOKEN, job_name=job_name)[0])
105+
except sr.TranscriptionFailed as e:
106+
print(e)
107+
except sr.RequestError as e:
108+
print("Could not request results from AssemblyAI service; {0}".format(e))

speech_recognition/__init__.py

Lines changed: 73 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,33 +1289,51 @@ def recognize_amazon(self, audio_data, bucket_name=None, access_key_id=None, sec
12891289

12901290
def recognize_assemblyai(self, audio_data, api_token, job_name=None, **kwargs):
12911291
"""
1292-
Wraps the AssemblyAI STT service.
1292+
Performs speech recognition using the AssemblyAI API.
1293+
12931294
https://www.assemblyai.com/
1295+
1296+
Args:
1297+
audio_data: Can be an ``AudioData`` instance or a str with a path to a file.
1298+
api_token: An AssemblyAI API token.
1299+
job_name: The name of the job which corresponds to the transcription id. If no job_name is given, it submits the file for transcription
1300+
and raises a ``speech_recognition.TranscriptionNotReady`` exception. The final transcript can then be queried at a later time
1301+
by passing the job_name.
1302+
1303+
Raises a ``speech_recognition.TranscriptionFailed`` exception if the speech recognition operation failed or if the key isn't valid.
1304+
Raises a ``speech_recognition.RequestError`` exception if API requests failed, e.g. if there is no internet connection.
1305+
1306+
Example:
1307+
```
1308+
try:
1309+
r.recognize_assemblyai(audio_data=audio, api_token=your_token)
1310+
except sr.TranscriptionNotReady as e:
1311+
job_name = e.job_name
1312+
1313+
# wait a little bit...
1314+
result = r.recognize_assemblyai(audio_data=None, api_token=your_token, job_name=job_name)
1315+
```
12941316
"""
12951317

1296-
def read_file(filename, chunk_size=5242880):
1297-
with open(filename, 'rb') as _file:
1298-
while True:
1299-
data = _file.read(chunk_size)
1300-
if not data:
1301-
break
1302-
yield data
1318+
headers = {"authorization": api_token}
13031319

13041320
check_existing = audio_data is None and job_name
13051321
if check_existing:
13061322
# Query status.
13071323
transciption_id = job_name
13081324
endpoint = f"https://api.assemblyai.com/v2/transcript/{transciption_id}"
1309-
headers = {
1310-
"authorization": api_token,
1311-
}
1312-
response = requests.get(endpoint, headers=headers)
1325+
1326+
try:
1327+
response = requests.get(endpoint, headers=headers)
1328+
except requests.exceptions.RequestException as e:
1329+
raise RequestError("recognition request failed: {}".format(e.reason))
1330+
13131331
data = response.json()
13141332
status = data['status']
13151333

13161334
if status == 'error':
13171335
# Handle error.
1318-
exc = TranscriptionFailed()
1336+
exc = TranscriptionFailed("Transcription failed: {}".format(data["error"]))
13191337
exc.job_name = None
13201338
exc.file_key = None
13211339
raise exc
@@ -1332,24 +1350,52 @@ def read_file(filename, chunk_size=5242880):
13321350
exc.file_key = None
13331351
raise exc
13341352
else:
1335-
# Upload file.
1336-
headers = {'authorization': api_token}
1337-
response = requests.post('https://api.assemblyai.com/v2/upload',
1338-
headers=headers,
1339-
data=read_file(audio_data))
1340-
upload_url = response.json()['upload_url']
1353+
# Upload file and queue for transcription.
1354+
# This path raises a TranscriptionNotReady error that contains the job_id.
1355+
# The job_id can then be used at a later point to query the transcript
1356+
if isinstance(audio_data, AudioData):
1357+
# convert to flac first
1358+
upload_data = audio_data.get_flac_data(
1359+
convert_rate=None if audio_data.sample_rate >= 8000 else 8000, # audio samples should be at least 8 kHz
1360+
convert_width=None if audio_data.sample_width >= 2 else 2 # audio samples should be at least 16-bit
1361+
)
1362+
else:
1363+
# assume audio_data is a path to a file that can be uploaded directly
1364+
upload_data = audio_data
1365+
1366+
try:
1367+
response = requests.post('https://api.assemblyai.com/v2/upload',
1368+
headers=headers,
1369+
data=upload_data)
1370+
except requests.exceptions.RequestException as e:
1371+
raise RequestError("recognition request failed: {}".format(e.reason))
1372+
1373+
data = response.json()
1374+
if "error" in data:
1375+
exc = TranscriptionFailed("Transcription failed: {}".format(data["error"]))
1376+
exc.job_name = None
1377+
exc.file_key = None
1378+
raise exc
1379+
1380+
upload_url = data['upload_url']
13411381

13421382
# Queue file for transcription.
13431383
endpoint = "https://api.assemblyai.com/v2/transcript"
1344-
json = {
1345-
"audio_url": upload_url
1346-
}
1347-
headers = {
1348-
"authorization": api_token,
1349-
"content-type": "application/json"
1350-
}
1351-
response = requests.post(endpoint, json=json, headers=headers)
1384+
json = { "audio_url": upload_url }
1385+
1386+
try:
1387+
response = requests.post(endpoint, json=json, headers=headers)
1388+
except requests.exceptions.RequestException as e:
1389+
raise RequestError("recognition request failed: {}".format(e.reason))
1390+
13521391
data = response.json()
1392+
1393+
if "error" in data:
1394+
exc = TranscriptionFailed("Transcription failed: {}".format(data["error"]))
1395+
exc.job_name = None
1396+
exc.file_key = None
1397+
raise exc
1398+
13531399
transciption_id = data['id']
13541400
exc = TranscriptionNotReady()
13551401
exc.job_name = transciption_id

0 commit comments

Comments
 (0)