aadnk commited on
Commit
de77829
1 Parent(s): 7fd072f

Add large-v1 and large-v2 to CLIs

Browse files
Files changed (2) hide show
  1. app.py +4 -2
  2. cli.py +2 -2
app.py CHANGED
@@ -57,6 +57,8 @@ LANGUAGES = [
57
  "Hausa", "Bashkir", "Javanese", "Sundanese"
58
  ]
59
 
 
 
60
  class WhisperTranscriber:
61
  def __init__(self, input_audio_max_duration: float = DEFAULT_INPUT_AUDIO_MAX_DURATION, vad_process_timeout: float = None, vad_cpu_cores: int = 1, delete_uploaded_files: bool = DELETE_UPLOADED_FILES):
62
  self.model_cache = ModelCache()
@@ -349,7 +351,7 @@ def create_ui(input_audio_max_duration, share=False, server_name: str = None, se
349
  ui_article = "Read the [documentation here](https://huggingface.co/spaces/aadnk/whisper-webui/blob/main/docs/options.md)"
350
 
351
  demo = gr.Interface(fn=ui.transcribe_webui, description=ui_description, article=ui_article, inputs=[
352
- gr.Dropdown(choices=["tiny", "base", "small", "medium", "large", "large-v1", "large-v2"], value=default_model_name, label="Model"),
353
  gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
354
  gr.Text(label="URL (YouTube, etc.)"),
355
  gr.File(label="Upload Files", file_count="multiple"),
@@ -377,7 +379,7 @@ if __name__ == '__main__':
377
  parser.add_argument("--share", type=bool, default=False, help="True to share the app on HuggingFace.")
378
  parser.add_argument("--server_name", type=str, default=None, help="The host or IP to bind to. If None, bind to localhost.")
379
  parser.add_argument("--server_port", type=int, default=7860, help="The port to bind to.")
380
- parser.add_argument("--default_model_name", type=str, default="medium", help="The default model name.")
381
  parser.add_argument("--default_vad", type=str, default="silero-vad", help="The default VAD.")
382
  parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
383
  parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
 
57
  "Hausa", "Bashkir", "Javanese", "Sundanese"
58
  ]
59
 
60
+ WHISPER_MODELS = ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2"]
61
+
62
  class WhisperTranscriber:
63
  def __init__(self, input_audio_max_duration: float = DEFAULT_INPUT_AUDIO_MAX_DURATION, vad_process_timeout: float = None, vad_cpu_cores: int = 1, delete_uploaded_files: bool = DELETE_UPLOADED_FILES):
64
  self.model_cache = ModelCache()
 
351
  ui_article = "Read the [documentation here](https://huggingface.co/spaces/aadnk/whisper-webui/blob/main/docs/options.md)"
352
 
353
  demo = gr.Interface(fn=ui.transcribe_webui, description=ui_description, article=ui_article, inputs=[
354
+ gr.Dropdown(choices=WHISPER_MODELS, value=default_model_name, label="Model"),
355
  gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
356
  gr.Text(label="URL (YouTube, etc.)"),
357
  gr.File(label="Upload Files", file_count="multiple"),
 
379
  parser.add_argument("--share", type=bool, default=False, help="True to share the app on HuggingFace.")
380
  parser.add_argument("--server_name", type=str, default=None, help="The host or IP to bind to. If None, bind to localhost.")
381
  parser.add_argument("--server_port", type=int, default=7860, help="The port to bind to.")
382
+ parser.add_argument("--default_model_name", type=str, choices=WHISPER_MODELS, default="medium", help="The default model name.")
383
  parser.add_argument("--default_vad", type=str, default="silero-vad", help="The default VAD.")
384
  parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
385
  parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
cli.py CHANGED
@@ -6,7 +6,7 @@ import warnings
6
  import numpy as np
7
 
8
  import torch
9
- from app import LANGUAGES, WhisperTranscriber
10
  from src.download import download_url
11
 
12
  from src.utils import optional_float, optional_int, str2bool
@@ -15,7 +15,7 @@ from src.whisperContainer import WhisperContainer
15
  def cli():
16
  parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
17
  parser.add_argument("audio", nargs="+", type=str, help="audio file(s) to transcribe")
18
- parser.add_argument("--model", default="small", choices=["tiny", "base", "small", "medium", "large"], help="name of the Whisper model to use")
19
  parser.add_argument("--model_dir", type=str, default=None, help="the path to save model files; uses ~/.cache/whisper by default")
20
  parser.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu", help="device to use for PyTorch inference")
21
  parser.add_argument("--output_dir", "-o", type=str, default=".", help="directory to save the outputs")
 
6
  import numpy as np
7
 
8
  import torch
9
+ from app import LANGUAGES, WHISPER_MODELS, WhisperTranscriber
10
  from src.download import download_url
11
 
12
  from src.utils import optional_float, optional_int, str2bool
 
15
  def cli():
16
  parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
17
  parser.add_argument("audio", nargs="+", type=str, help="audio file(s) to transcribe")
18
+ parser.add_argument("--model", default="small", choices=WHISPER_MODELS, help="name of the Whisper model to use")
19
  parser.add_argument("--model_dir", type=str, default=None, help="the path to save model files; uses ~/.cache/whisper by default")
20
  parser.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu", help="device to use for PyTorch inference")
21
  parser.add_argument("--output_dir", "-o", type=str, default=".", help="directory to save the outputs")