aadnk commited on
Commit
751fc8f
1 Parent(s): adca588

Recommend faster-whisper for GPU inference

Browse files
Files changed (1) hide show
  1. app.py +17 -2
app.py CHANGED
@@ -399,16 +399,31 @@ def create_ui(app_config: ApplicationConfig):
399
  ui.set_parallel_devices(app_config.vad_parallel_devices)
400
  ui.set_auto_parallel(app_config.auto_parallel)
401
 
402
- ui_description = "Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse "
 
 
 
 
 
 
 
 
 
 
 
403
  ui_description += " audio and is also a multi-task model that can perform multilingual speech recognition "
404
  ui_description += " as well as speech translation and language identification. "
405
 
406
  ui_description += "\n\n\n\nFor longer audio files (>10 minutes) not in English, it is recommended that you select Silero VAD (Voice Activity Detector) in the VAD option."
407
 
 
 
 
 
408
  if app_config.input_audio_max_duration > 0:
409
  ui_description += "\n\n" + "Max audio file length: " + str(app_config.input_audio_max_duration) + " s"
410
 
411
- ui_article = "Read the [documentation here](https://gitlab.com/aadnk/whisper-webui/-/blob/main/docs/options.md)"
412
 
413
  whisper_models = app_config.get_model_names()
414
 
 
399
  ui.set_parallel_devices(app_config.vad_parallel_devices)
400
  ui.set_auto_parallel(app_config.auto_parallel)
401
 
402
+ is_whisper = False
403
+
404
+ if app_config.whisper_implementation == "whisper":
405
+ implementation_name = "Whisper"
406
+ is_whisper = True
407
+ elif app_config.whisper_implementation in ["faster-whisper", "faster_whisper"]:
408
+ implementation_name = "Faster Whisper"
409
+ else:
410
+ # Try to convert from camel-case to title-case
411
+ implementation_name = app_config.whisper_implementation.title().replace("_", " ").replace("-", " ")
412
+
413
+ ui_description = implementation_name + " is a general-purpose speech recognition model. It is trained on a large dataset of diverse "
414
  ui_description += " audio and is also a multi-task model that can perform multilingual speech recognition "
415
  ui_description += " as well as speech translation and language identification. "
416
 
417
  ui_description += "\n\n\n\nFor longer audio files (>10 minutes) not in English, it is recommended that you select Silero VAD (Voice Activity Detector) in the VAD option."
418
 
419
+ # Recommend faster-whisper
420
+ if is_whisper:
421
+ ui_description += "\n\n\n\nFor faster inference on GPU, try [faster-whisper](https://huggingface.co/spaces/aadnk/faster-whisper-webui)."
422
+
423
  if app_config.input_audio_max_duration > 0:
424
  ui_description += "\n\n" + "Max audio file length: " + str(app_config.input_audio_max_duration) + " s"
425
 
426
+ ui_article = "Read the [documentation here](https://gitlab.com/aadnk/whisper-webui/-/blob/main/docs/options.md)."
427
 
428
  whisper_models = app_config.get_model_names()
429