avans06 commited on
Commit
28514b1
1 Parent(s): 77b92a2

Update the dependency package faster-whisper to version 0.10.0

Browse files

faster-whisper officially supports the large-v3 model now, so update the large-v3 model URL in the config to the official version.

app.py CHANGED
@@ -137,7 +137,7 @@ class WhisperTranscriber:
137
  vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, self.app_config.vad_padding, self.app_config.vad_prompt_window, self.app_config.vad_initial_prompt_mode)
138
 
139
  if diarization:
140
- if diarization_speakers < 1:
141
  self.set_diarization(auth_token=self.app_config.auth_token, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
142
  else:
143
  self.set_diarization(auth_token=self.app_config.auth_token, num_speakers=diarization_speakers, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
@@ -189,7 +189,7 @@ class WhisperTranscriber:
189
 
190
  # Set diarization
191
  if diarization:
192
- if diarization_speakers < 1:
193
  self.set_diarization(auth_token=self.app_config.auth_token, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
194
  else:
195
  self.set_diarization(auth_token=self.app_config.auth_token, num_speakers=diarization_speakers, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
@@ -209,7 +209,8 @@ class WhisperTranscriber:
209
  try:
210
  progress(0, desc="init audio sources")
211
  sources = self.__get_source(urlData, multipleFiles, microphoneData)
212
-
 
213
  try:
214
  progress(0, desc="init whisper model")
215
  whisper_lang = get_language_from_name(languageName)
@@ -361,6 +362,11 @@ class WhisperTranscriber:
361
 
362
  except ExceededMaximumDuration as e:
363
  return [], ("[ERROR]: Maximum remote video length is " + str(e.maxDuration) + "s, file was " + str(e.videoDuration) + "s"), "[ERROR]"
 
 
 
 
 
364
 
365
  def transcribe_file(self, model: AbstractWhisperContainer, audio_path: str, language: str, task: str = None,
366
  vadOptions: VadOptions = VadOptions(),
 
137
  vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, self.app_config.vad_padding, self.app_config.vad_prompt_window, self.app_config.vad_initial_prompt_mode)
138
 
139
  if diarization:
140
+ if diarization_speakers is not None and diarization_speakers < 1:
141
  self.set_diarization(auth_token=self.app_config.auth_token, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
142
  else:
143
  self.set_diarization(auth_token=self.app_config.auth_token, num_speakers=diarization_speakers, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
 
189
 
190
  # Set diarization
191
  if diarization:
192
+ if diarization_speakers is not None and diarization_speakers < 1:
193
  self.set_diarization(auth_token=self.app_config.auth_token, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
194
  else:
195
  self.set_diarization(auth_token=self.app_config.auth_token, num_speakers=diarization_speakers, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
 
209
  try:
210
  progress(0, desc="init audio sources")
211
  sources = self.__get_source(urlData, multipleFiles, microphoneData)
212
+ if (len(sources) == 0):
213
+ raise Exception("init audio sources failed...")
214
  try:
215
  progress(0, desc="init whisper model")
216
  whisper_lang = get_language_from_name(languageName)
 
362
 
363
  except ExceededMaximumDuration as e:
364
  return [], ("[ERROR]: Maximum remote video length is " + str(e.maxDuration) + "s, file was " + str(e.videoDuration) + "s"), "[ERROR]"
365
+ except Exception as e:
366
+ import traceback
367
+ print(traceback.format_exc())
368
+ return [], ("Error occurred during transcribe: " + str(e)), ""
369
+
370
 
371
  def transcribe_file(self, model: AbstractWhisperContainer, audio_path: str, language: str, task: str = None,
372
  vadOptions: VadOptions = VadOptions(),
config.json5 CHANGED
@@ -28,8 +28,7 @@
28
  },
29
  {
30
  "name": "large-v3",
31
- "url": "avans06/faster-whisper-large-v3",
32
- "type": "huggingface"
33
  },
34
  // Uncomment to add custom Japanese models
35
  //{
 
28
  },
29
  {
30
  "name": "large-v3",
31
+ "url": "large-v3"
 
32
  },
33
  // Uncomment to add custom Japanese models
34
  //{
requirements-fasterWhisper.txt CHANGED
@@ -1,6 +1,6 @@
1
  git+https://github.com/huggingface/transformers
2
  ctranslate2>=3.21.0
3
- faster-whisper
4
  ffmpeg-python==0.2.0
5
  gradio==3.50.2
6
  yt-dlp
 
1
  git+https://github.com/huggingface/transformers
2
  ctranslate2>=3.21.0
3
+ faster-whisper>=0.10.0
4
  ffmpeg-python==0.2.0
5
  gradio==3.50.2
6
  yt-dlp
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  git+https://github.com/huggingface/transformers
2
  ctranslate2>=3.21.0
3
- faster-whisper
4
  ffmpeg-python==0.2.0
5
  gradio==3.50.2
6
  yt-dlp
 
1
  git+https://github.com/huggingface/transformers
2
  ctranslate2>=3.21.0
3
+ faster-whisper>=0.10.0
4
  ffmpeg-python==0.2.0
5
  gradio==3.50.2
6
  yt-dlp
src/whisper/fasterWhisperContainer.py CHANGED
@@ -55,10 +55,6 @@ class FasterWhisperContainer(AbstractWhisperContainer):
55
  device = "auto"
56
 
57
  model = WhisperModel(model_url, device=device, compute_type=self.compute_type)
58
- if "large-v3" in model_url:
59
- # Working with Whisper-large-v3
60
- # https://github.com/guillaumekln/faster-whisper/issues/547#issuecomment-1797962599
61
- model.feature_extractor.mel_filters = model.feature_extractor.get_mel_filters(model.feature_extractor.sampling_rate, model.feature_extractor.n_fft, n_mels=128)
62
  return model
63
 
64
  def create_callback(self, language: str = None, task: str = None,
 
55
  device = "auto"
56
 
57
  model = WhisperModel(model_url, device=device, compute_type=self.compute_type)
 
 
 
 
58
  return model
59
 
60
  def create_callback(self, language: str = None, task: str = None,