Update the dependency package faster-whisper to version 0.10.0
Browse filesfaster-whisper officially supports the large-v3 model now, so update the large-v3 model URL in the config to the official version.
- app.py +9 -3
- config.json5 +1 -2
- requirements-fasterWhisper.txt +1 -1
- requirements.txt +1 -1
- src/whisper/fasterWhisperContainer.py +0 -4
app.py
CHANGED
@@ -137,7 +137,7 @@ class WhisperTranscriber:
|
|
137 |
vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, self.app_config.vad_padding, self.app_config.vad_prompt_window, self.app_config.vad_initial_prompt_mode)
|
138 |
|
139 |
if diarization:
|
140 |
-
if diarization_speakers < 1:
|
141 |
self.set_diarization(auth_token=self.app_config.auth_token, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
|
142 |
else:
|
143 |
self.set_diarization(auth_token=self.app_config.auth_token, num_speakers=diarization_speakers, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
|
@@ -189,7 +189,7 @@ class WhisperTranscriber:
|
|
189 |
|
190 |
# Set diarization
|
191 |
if diarization:
|
192 |
-
if diarization_speakers < 1:
|
193 |
self.set_diarization(auth_token=self.app_config.auth_token, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
|
194 |
else:
|
195 |
self.set_diarization(auth_token=self.app_config.auth_token, num_speakers=diarization_speakers, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
|
@@ -209,7 +209,8 @@ class WhisperTranscriber:
|
|
209 |
try:
|
210 |
progress(0, desc="init audio sources")
|
211 |
sources = self.__get_source(urlData, multipleFiles, microphoneData)
|
212 |
-
|
|
|
213 |
try:
|
214 |
progress(0, desc="init whisper model")
|
215 |
whisper_lang = get_language_from_name(languageName)
|
@@ -361,6 +362,11 @@ class WhisperTranscriber:
|
|
361 |
|
362 |
except ExceededMaximumDuration as e:
|
363 |
return [], ("[ERROR]: Maximum remote video length is " + str(e.maxDuration) + "s, file was " + str(e.videoDuration) + "s"), "[ERROR]"
|
|
|
|
|
|
|
|
|
|
|
364 |
|
365 |
def transcribe_file(self, model: AbstractWhisperContainer, audio_path: str, language: str, task: str = None,
|
366 |
vadOptions: VadOptions = VadOptions(),
|
|
|
137 |
vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, self.app_config.vad_padding, self.app_config.vad_prompt_window, self.app_config.vad_initial_prompt_mode)
|
138 |
|
139 |
if diarization:
|
140 |
+
if diarization_speakers is not None and diarization_speakers < 1:
|
141 |
self.set_diarization(auth_token=self.app_config.auth_token, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
|
142 |
else:
|
143 |
self.set_diarization(auth_token=self.app_config.auth_token, num_speakers=diarization_speakers, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
|
|
|
189 |
|
190 |
# Set diarization
|
191 |
if diarization:
|
192 |
+
if diarization_speakers is not None and diarization_speakers < 1:
|
193 |
self.set_diarization(auth_token=self.app_config.auth_token, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
|
194 |
else:
|
195 |
self.set_diarization(auth_token=self.app_config.auth_token, num_speakers=diarization_speakers, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
|
|
|
209 |
try:
|
210 |
progress(0, desc="init audio sources")
|
211 |
sources = self.__get_source(urlData, multipleFiles, microphoneData)
|
212 |
+
if (len(sources) == 0):
|
213 |
+
raise Exception("init audio sources failed...")
|
214 |
try:
|
215 |
progress(0, desc="init whisper model")
|
216 |
whisper_lang = get_language_from_name(languageName)
|
|
|
362 |
|
363 |
except ExceededMaximumDuration as e:
|
364 |
return [], ("[ERROR]: Maximum remote video length is " + str(e.maxDuration) + "s, file was " + str(e.videoDuration) + "s"), "[ERROR]"
|
365 |
+
except Exception as e:
|
366 |
+
import traceback
|
367 |
+
print(traceback.format_exc())
|
368 |
+
return [], ("Error occurred during transcribe: " + str(e)), ""
|
369 |
+
|
370 |
|
371 |
def transcribe_file(self, model: AbstractWhisperContainer, audio_path: str, language: str, task: str = None,
|
372 |
vadOptions: VadOptions = VadOptions(),
|
config.json5
CHANGED
@@ -28,8 +28,7 @@
|
|
28 |
},
|
29 |
{
|
30 |
"name": "large-v3",
|
31 |
-
"url": "
|
32 |
-
"type": "huggingface"
|
33 |
},
|
34 |
// Uncomment to add custom Japanese models
|
35 |
//{
|
|
|
28 |
},
|
29 |
{
|
30 |
"name": "large-v3",
|
31 |
+
"url": "large-v3"
|
|
|
32 |
},
|
33 |
// Uncomment to add custom Japanese models
|
34 |
//{
|
requirements-fasterWhisper.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
git+https://github.com/huggingface/transformers
|
2 |
ctranslate2>=3.21.0
|
3 |
-
faster-whisper
|
4 |
ffmpeg-python==0.2.0
|
5 |
gradio==3.50.2
|
6 |
yt-dlp
|
|
|
1 |
git+https://github.com/huggingface/transformers
|
2 |
ctranslate2>=3.21.0
|
3 |
+
faster-whisper>=0.10.0
|
4 |
ffmpeg-python==0.2.0
|
5 |
gradio==3.50.2
|
6 |
yt-dlp
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
git+https://github.com/huggingface/transformers
|
2 |
ctranslate2>=3.21.0
|
3 |
-
faster-whisper
|
4 |
ffmpeg-python==0.2.0
|
5 |
gradio==3.50.2
|
6 |
yt-dlp
|
|
|
1 |
git+https://github.com/huggingface/transformers
|
2 |
ctranslate2>=3.21.0
|
3 |
+
faster-whisper>=0.10.0
|
4 |
ffmpeg-python==0.2.0
|
5 |
gradio==3.50.2
|
6 |
yt-dlp
|
src/whisper/fasterWhisperContainer.py
CHANGED
@@ -55,10 +55,6 @@ class FasterWhisperContainer(AbstractWhisperContainer):
|
|
55 |
device = "auto"
|
56 |
|
57 |
model = WhisperModel(model_url, device=device, compute_type=self.compute_type)
|
58 |
-
if "large-v3" in model_url:
|
59 |
-
# Working with Whisper-large-v3
|
60 |
-
# https://github.com/guillaumekln/faster-whisper/issues/547#issuecomment-1797962599
|
61 |
-
model.feature_extractor.mel_filters = model.feature_extractor.get_mel_filters(model.feature_extractor.sampling_rate, model.feature_extractor.n_fft, n_mels=128)
|
62 |
return model
|
63 |
|
64 |
def create_callback(self, language: str = None, task: str = None,
|
|
|
55 |
device = "auto"
|
56 |
|
57 |
model = WhisperModel(model_url, device=device, compute_type=self.compute_type)
|
|
|
|
|
|
|
|
|
58 |
return model
|
59 |
|
60 |
def create_callback(self, language: str = None, task: str = None,
|