Skip to content

Commit bbc417f

Browse files
committed
added Whisper CTranslate2 port
1 parent 2174217 commit bbc417f

File tree

5 files changed

+136
-0
lines changed

5 files changed

+136
-0
lines changed

whisper-ct2/README.md

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Whisper for CTranslate2
2+
3+
A port of Open-AI Whisper for CTranslate2.
4+
Repositories:
5+
- Whisper original: https://github.com/openai/whisper
6+
- Faster Whisper: https://github.com/guillaumekln/faster-whisper
7+
8+
Tested with:
9+
- Arm64 - Debian 11 - Python 3.9

whisper-ct2/get-model.sh

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
model="tiny"
3+
if [ -n "$1" ]; then
4+
model=$1
5+
else
6+
echo "Please specify the model to download."
7+
echo "Examples: tiny, tiny.en, base, small, ..."
8+
exit
9+
fi
10+
echo "Downloading and converting: openai/whisper-$1 (from https://huggingface.co/openai)..."
11+
echo "NOTE: If download fails check the experiments repository for model files."
12+
echo ""
13+
mkdir -p models
14+
if [ ! -d "models/whisper-$1-ct2" ]; then
15+
ct2-transformers-converter --model "openai/whisper-$1" --output_dir "models/whisper-$1-ct2" --quantization int8
16+
else
17+
echo "Folder already exists: models/whisper-$1-ct2 - skipped"
18+
fi

whisper-ct2/install.sh

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/bin/bash
2+
set -e
3+
echo "Installing Whisper for CTranslate2 ..."
4+
sudo apt update
5+
sudo apt install -y --no-install-recommends python3-pip python3-dev python3-setuptools python3-wheel python3-venv
6+
if [ -d "venv/" ]; then
7+
echo "Activating Python virtual env."
8+
source venv/bin/activate
9+
else
10+
echo "Creating and activating Python virtual env."
11+
python3 -m venv venv && source venv/bin/activate
12+
fi
13+
echo "Installing packages ..."
14+
pip3 install --upgrade pip
15+
git clone https://github.com/guillaumekln/faster-whisper
16+
cd faster-whisper
17+
pip3 install -e .[conversion] # to convert models
18+
#pip3 install -e . # if you have models already
19+
echo "Downloading models ..."
20+
cd ..
21+
#bash get-model.sh "tiny.en"
22+
bash get-model.sh "tiny"
23+
echo "DONE"

whisper-ct2/run-test.sh

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
if [ -d "venv/" ]; then
3+
echo "Please make sure you've activated the Python virtual environment!"
4+
echo "Use: source venv/bin/activate"
5+
else
6+
echo "No Python virtual environment found."
7+
fi
8+
echo ""
9+
time python3 test.py --lang "auto" --beamsize 1 --threads 2 --model "models/whisper-tiny-ct2"

whisper-ct2/test.py

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import os
2+
import re
3+
from timeit import default_timer as timer
4+
import wave
5+
import argparse
6+
7+
parser = argparse.ArgumentParser(description="Running Whisper TFlite test inference.")
8+
parser.add_argument("-f", "--folder", default="../test-files/", help="Folder with WAV input files")
9+
parser.add_argument("-m", "--model", default="models/whisper-tiny-ct2", help="Path to model")
10+
parser.add_argument("-l", "--lang", default="auto", help="Language used (default: auto)")
11+
parser.add_argument("-t", "--threads", default=2, help="Threads used (default: 2)")
12+
parser.add_argument("-b", "--beamsize", default=1, help="Beam size used (default: 1)")
13+
args = parser.parse_args()
14+
15+
print(f'Importing WhisperModel')
16+
from faster_whisper import WhisperModel
17+
18+
# run on CPU with INT8:
19+
model_path = args.model
20+
print(f'\nLoading model {model_path} ...')
21+
model = WhisperModel(model_path, device="cpu", compute_type="int8", cpu_threads=int(args.threads))
22+
#model = WhisperModel(args.model, device="cuda", compute_type="float16")
23+
print(f'Threads: {args.threads}')
24+
print(f'Beam size: {args.beamsize}')
25+
26+
def transcribe(audio_file):
27+
print(f'\nLoading audio file: {audio_file}')
28+
wf = wave.open(audio_file, "rb")
29+
sample_rate_orig = wf.getframerate()
30+
audio_length = wf.getnframes() * (1 / sample_rate_orig)
31+
if (wf.getnchannels() != 1 or wf.getsampwidth() != 2
32+
or wf.getcomptype() != "NONE" or sample_rate_orig != 16000):
33+
print("Audio file must be WAV format mono PCM.")
34+
exit (1)
35+
wf.close()
36+
print(f'Samplerate: {sample_rate_orig}, length: {audio_length}s')
37+
38+
file_lang = None
39+
lang_search = re.findall(r"(?:^|/)(\w\w)_", audio_file)
40+
if len(lang_search) > 0:
41+
file_lang = lang_search.pop()
42+
43+
inference_start = timer()
44+
45+
print("\nTranscribing ...")
46+
segments = None
47+
info = None
48+
if "tiny.en" in model_path:
49+
if file_lang is not None and file_lang != "en":
50+
print(f"Language found in file name: {file_lang}")
51+
print("Skipped file to avoid issues with tiny.en model")
52+
else:
53+
segments, info = model.transcribe(audio_file, beam_size=int(args.beamsize))
54+
print("Model language fixed to 'en'")
55+
elif args.lang == "auto":
56+
if file_lang is not None:
57+
segments, info = model.transcribe(audio_file, beam_size=int(args.beamsize), language=file_lang)
58+
print(f"Language found in file name: {file_lang}")
59+
else:
60+
segments, info = model.transcribe(audio_file, beam_size=int(args.beamsize))
61+
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
62+
else:
63+
segments, info = model.transcribe(audio_file, beam_size=int(args.beamsize), language=args.lang)
64+
print(f'Pre-defined language: {args.lang}')
65+
66+
if segments is not None:
67+
print("Result:")
68+
for segment in segments:
69+
print("[%ds -> %ds] %s" % (segment.start, segment.end, segment.text))
70+
71+
print("\nInference took {:.2f}s for {:.2f}s audio file.".format(
72+
timer() - inference_start, audio_length))
73+
74+
test_files = os.listdir(args.folder)
75+
for file in test_files:
76+
if file.endswith(".wav"):
77+
transcribe(args.folder + file)

0 commit comments

Comments
 (0)