aadnk commited on
Commit
2698c96
1 Parent(s): b5af58b

Fix WHISPER_IMPLEMENTATION argument

Browse files
app.py CHANGED
@@ -125,7 +125,7 @@ class WhisperTranscriber:
125
  selectedLanguage = languageName.lower() if len(languageName) > 0 else None
126
  selectedModel = modelName if modelName is not None else "base"
127
 
128
- model = create_whisper_container(whisper_implementation=app_config.whisper_implementation,
129
  model_name=selectedModel, cache=self.model_cache, models=self.app_config.models)
130
 
131
  # Result
@@ -485,38 +485,43 @@ def create_ui(app_config: ApplicationConfig):
485
  ui.close()
486
 
487
  if __name__ == '__main__':
488
- app_config = ApplicationConfig.create_default()
489
- whisper_models = app_config.get_model_names()
 
 
 
490
 
491
  parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
492
- parser.add_argument("--input_audio_max_duration", type=int, default=app_config.input_audio_max_duration, \
493
  help="Maximum audio file length in seconds, or -1 for no limit.") # 600
494
- parser.add_argument("--share", type=bool, default=app_config.share, \
495
  help="True to share the app on HuggingFace.") # False
496
- parser.add_argument("--server_name", type=str, default=app_config.server_name, \
497
  help="The host or IP to bind to. If None, bind to localhost.") # None
498
- parser.add_argument("--server_port", type=int, default=app_config.server_port, \
499
  help="The port to bind to.") # 7860
500
- parser.add_argument("--queue_concurrency_count", type=int, default=app_config.queue_concurrency_count, \
501
  help="The number of concurrent requests to process.") # 1
502
- parser.add_argument("--default_model_name", type=str, choices=whisper_models, default=app_config.default_model_name, \
503
  help="The default model name.") # medium
504
- parser.add_argument("--default_vad", type=str, default=app_config.default_vad, \
505
  help="The default VAD.") # silero-vad
506
- parser.add_argument("--vad_parallel_devices", type=str, default=app_config.vad_parallel_devices, \
507
  help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.") # ""
508
- parser.add_argument("--vad_cpu_cores", type=int, default=app_config.vad_cpu_cores, \
509
  help="The number of CPU cores to use for VAD pre-processing.") # 1
510
- parser.add_argument("--vad_process_timeout", type=float, default=app_config.vad_process_timeout, \
511
  help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.") # 1800
512
- parser.add_argument("--auto_parallel", type=bool, default=app_config.auto_parallel, \
513
  help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.") # False
514
- parser.add_argument("--output_dir", "-o", type=str, default=app_config.output_dir, \
515
- help="directory to save the outputs"), \
516
- parser.add_argument("--whisper_implementation", type=str, default=app_config.whisper_implementation, choices=["whisper", "faster-whisper"],\
517
- help="the Whisper implementation to use"), \
518
 
519
  args = parser.parse_args().__dict__
520
 
521
- updated_config = app_config.update(**args)
 
 
522
  create_ui(app_config=updated_config)
 
125
  selectedLanguage = languageName.lower() if len(languageName) > 0 else None
126
  selectedModel = modelName if modelName is not None else "base"
127
 
128
+ model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
129
  model_name=selectedModel, cache=self.model_cache, models=self.app_config.models)
130
 
131
  # Result
 
485
  ui.close()
486
 
487
  if __name__ == '__main__':
488
+ default_app_config = ApplicationConfig.create_default()
489
+ whisper_models = default_app_config.get_model_names()
490
+
491
+ # Environment variable overrides
492
+ default_whisper_implementation = os.environ.get("WHISPER_IMPLEMENTATION", default_app_config.whisper_implementation)
493
 
494
  parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
495
+ parser.add_argument("--input_audio_max_duration", type=int, default=default_app_config.input_audio_max_duration, \
496
  help="Maximum audio file length in seconds, or -1 for no limit.") # 600
497
+ parser.add_argument("--share", type=bool, default=default_app_config.share, \
498
  help="True to share the app on HuggingFace.") # False
499
+ parser.add_argument("--server_name", type=str, default=default_app_config.server_name, \
500
  help="The host or IP to bind to. If None, bind to localhost.") # None
501
+ parser.add_argument("--server_port", type=int, default=default_app_config.server_port, \
502
  help="The port to bind to.") # 7860
503
+ parser.add_argument("--queue_concurrency_count", type=int, default=default_app_config.queue_concurrency_count, \
504
  help="The number of concurrent requests to process.") # 1
505
+ parser.add_argument("--default_model_name", type=str, choices=whisper_models, default=default_app_config.default_model_name, \
506
  help="The default model name.") # medium
507
+ parser.add_argument("--default_vad", type=str, default=default_app_config.default_vad, \
508
  help="The default VAD.") # silero-vad
509
+ parser.add_argument("--vad_parallel_devices", type=str, default=default_app_config.vad_parallel_devices, \
510
  help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.") # ""
511
+ parser.add_argument("--vad_cpu_cores", type=int, default=default_app_config.vad_cpu_cores, \
512
  help="The number of CPU cores to use for VAD pre-processing.") # 1
513
+ parser.add_argument("--vad_process_timeout", type=float, default=default_app_config.vad_process_timeout, \
514
  help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.") # 1800
515
+ parser.add_argument("--auto_parallel", type=bool, default=default_app_config.auto_parallel, \
516
  help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.") # False
517
+ parser.add_argument("--output_dir", "-o", type=str, default=default_app_config.output_dir, \
518
+ help="directory to save the outputs")
519
+ parser.add_argument("--whisper_implementation", type=str, default=default_whisper_implementation, choices=["whisper", "faster-whisper"],\
520
+ help="the Whisper implementation to use")
521
 
522
  args = parser.parse_args().__dict__
523
 
524
+ updated_config = default_app_config.update(**args)
525
+
526
+ print(f"Using {updated_config.whisper_implementation} for Whisper")
527
  create_ui(app_config=updated_config)
cli.py CHANGED
@@ -20,6 +20,9 @@ def cli():
20
  # For the CLI, we fallback to saving the output to the current directory
21
  output_dir = app_config.output_dir if app_config.output_dir is not None else "."
22
 
 
 
 
23
  parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
24
  parser.add_argument("audio", nargs="+", type=str, \
25
  help="audio file(s) to transcribe")
@@ -32,9 +35,9 @@ def cli():
32
  parser.add_argument("--output_dir", "-o", type=str, default=output_dir, \
33
  help="directory to save the outputs")
34
  parser.add_argument("--verbose", type=str2bool, default=app_config.verbose, \
35
- help="whether to print out the progress and debug messages"), \
36
- parser.add_argument("--whisper_implementation", type=str, default=app_config.whisper_implementation, choices=["whisper", "faster-whisper"],\
37
- help="the Whisper implementation to use"), \
38
 
39
  parser.add_argument("--task", type=str, default=app_config.task, choices=["transcribe", "translate"], \
40
  help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
@@ -95,6 +98,7 @@ def cli():
95
  os.makedirs(output_dir, exist_ok=True)
96
 
97
  whisper_implementation = args.pop("whisper_implementation")
 
98
 
99
  if model_name.endswith(".en") and args["language"] not in {"en", "English"}:
100
  warnings.warn(f"{model_name} is an English-only model but receipted '{args['language']}'; using English instead.")
 
20
  # For the CLI, we fallback to saving the output to the current directory
21
  output_dir = app_config.output_dir if app_config.output_dir is not None else "."
22
 
23
+ # Environment variable overrides
24
+ default_whisper_implementation = os.environ.get("WHISPER_IMPLEMENTATION", app_config.whisper_implementation)
25
+
26
  parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
27
  parser.add_argument("audio", nargs="+", type=str, \
28
  help="audio file(s) to transcribe")
 
35
  parser.add_argument("--output_dir", "-o", type=str, default=output_dir, \
36
  help="directory to save the outputs")
37
  parser.add_argument("--verbose", type=str2bool, default=app_config.verbose, \
38
+ help="whether to print out the progress and debug messages")
39
+ parser.add_argument("--whisper_implementation", type=str, default=default_whisper_implementation, choices=["whisper", "faster-whisper"],\
40
+ help="the Whisper implementation to use")
41
 
42
  parser.add_argument("--task", type=str, default=app_config.task, choices=["transcribe", "translate"], \
43
  help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
 
98
  os.makedirs(output_dir, exist_ok=True)
99
 
100
  whisper_implementation = args.pop("whisper_implementation")
101
+ print(f"Using {whisper_implementation} for Whisper")
102
 
103
  if model_name.endswith(".en") and args["language"] not in {"en", "English"}:
104
  warnings.warn(f"{model_name} is an English-only model but receipted '{args['language']}'; using English instead.")
dockerfile CHANGED
@@ -1,13 +1,23 @@
 
 
1
  FROM huggingface/transformers-pytorch-gpu
2
  EXPOSE 7860
3
 
 
 
 
4
  ADD . /opt/whisper-webui/
5
 
6
  # Latest version of transformers-pytorch-gpu seems to lack tk.
7
  # Further, pip install fails, so we must upgrade pip first.
8
  RUN apt-get -y install python3-tk
9
- RUN python3 -m pip install --upgrade pip &&\
10
- python3 -m pip install -r /opt/whisper-webui/requirements.txt
 
 
 
 
 
11
 
12
  # Note: Models will be downloaded on demand to the directory /root/.cache/whisper.
13
  # You can also bind this directory in the container to somewhere on the host.
 
1
+ # docker build -t whisper-webui --build-arg WHISPER_IMPLEMENTATION=whisper .
2
+
3
  FROM huggingface/transformers-pytorch-gpu
4
  EXPOSE 7860
5
 
6
+ ARG WHISPER_IMPLEMENTATION=whisper
7
+ ENV WHISPER_IMPLEMENTATION=${WHISPER_IMPLEMENTATION}
8
+
9
  ADD . /opt/whisper-webui/
10
 
11
  # Latest version of transformers-pytorch-gpu seems to lack tk.
12
  # Further, pip install fails, so we must upgrade pip first.
13
  RUN apt-get -y install python3-tk
14
+ RUN python3 -m pip install --upgrade pip
15
+
16
+ RUN if [ "${WHISPER_IMPLEMENTATION}" = "whisper" ]; then \
17
+ python3 -m pip install -r /opt/whisper-webui/requirements.txt; \
18
+ else \
19
+ python3 -m pip install -r /opt/whisper-webui/requirements-fasterWhisper.txt; \
20
+ fi
21
 
22
  # Note: Models will be downloaded on demand to the directory /root/.cache/whisper.
23
  # You can also bind this directory in the container to somewhere on the host.
requirements-fastWhisper.txt → requirements-fasterWhisper.txt RENAMED
@@ -5,4 +5,5 @@ gradio==3.23.0
5
  yt-dlp
6
  json5
7
  torch
8
- torchaudio
 
 
5
  yt-dlp
6
  json5
7
  torch
8
+ torchaudio
9
+ more_itertools
src/whisper/whisperFactory.py CHANGED
@@ -6,6 +6,8 @@ from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
6
  def create_whisper_container(whisper_implementation: str,
7
  model_name: str, device: str = None, download_root: str = None,
8
  cache: modelCache = None, models: List[ModelConfig] = []) -> AbstractWhisperContainer:
 
 
9
  if (whisper_implementation == "whisper"):
10
  from src.whisper.whisperContainer import WhisperContainer
11
  return WhisperContainer(model_name, device, download_root, cache, models)
 
6
  def create_whisper_container(whisper_implementation: str,
7
  model_name: str, device: str = None, download_root: str = None,
8
  cache: modelCache = None, models: List[ModelConfig] = []) -> AbstractWhisperContainer:
9
+ print("Creating whisper container for " + whisper_implementation)
10
+
11
  if (whisper_implementation == "whisper"):
12
  from src.whisper.whisperContainer import WhisperContainer
13
  return WhisperContainer(model_name, device, download_root, cache, models)