aadnk commited on
Commit
20f75ae
1 Parent(s): c4e4d19

Add an option for automatically configuring parallel execution

Browse files
Files changed (3) hide show
  1. app.py +16 -1
  2. cli.py +3 -0
  3. src/vadParallel.py +4 -0
app.py CHANGED
@@ -6,6 +6,8 @@ from io import StringIO
6
  import os
7
  import pathlib
8
  import tempfile
 
 
9
  from src.modelCache import ModelCache
10
  from src.vadParallel import ParallelContext, ParallelTranscription
11
 
@@ -29,6 +31,9 @@ DELETE_UPLOADED_FILES = True
29
  # Gradio seems to truncate files without keeping the extension, so we need to truncate the file prefix ourself
30
  MAX_FILE_PREFIX_LENGTH = 17
31
 
 
 
 
32
  LANGUAGES = [
33
  "English", "Chinese", "German", "Spanish", "Russian", "Korean",
34
  "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan",
@@ -65,6 +70,14 @@ class WhisperTranscriber:
65
  def set_parallel_devices(self, vad_parallel_devices: str):
66
  self.parallel_device_list = [ device.strip() for device in vad_parallel_devices.split(",") ] if vad_parallel_devices else None
67
 
 
 
 
 
 
 
 
 
68
  def transcribe_webui(self, modelName, languageName, urlData, uploadFile, microphoneData, task, vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow):
69
  try:
70
  source, sourceName = self.__get_source(urlData, uploadFile, microphoneData)
@@ -268,11 +281,12 @@ class WhisperTranscriber:
268
 
269
 
270
  def create_ui(input_audio_max_duration, share=False, server_name: str = None, server_port: int = 7860,
271
- default_model_name: str = "medium", default_vad: str = None, vad_parallel_devices: str = None, vad_process_timeout: float = None, vad_cpu_cores: int = 1):
272
  ui = WhisperTranscriber(input_audio_max_duration, vad_process_timeout, vad_cpu_cores)
273
 
274
  # Specify a list of devices to use for parallel processing
275
  ui.set_parallel_devices(vad_parallel_devices)
 
276
 
277
  ui_description = "Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse "
278
  ui_description += " audio and is also a multi-task model that can perform multilingual speech recognition "
@@ -319,6 +333,7 @@ if __name__ == '__main__':
319
  parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
320
  parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
321
  parser.add_argument("--vad_process_timeout", type=float, default="1800", help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.")
 
322
 
323
  args = parser.parse_args().__dict__
324
  create_ui(**args)
 
6
  import os
7
  import pathlib
8
  import tempfile
9
+
10
+ import torch
11
  from src.modelCache import ModelCache
12
  from src.vadParallel import ParallelContext, ParallelTranscription
13
 
 
31
  # Gradio seems to truncate files without keeping the extension, so we need to truncate the file prefix ourself
32
  MAX_FILE_PREFIX_LENGTH = 17
33
 
34
+ # Limit auto_parallel to a certain number of CPUs (specify vad_cpu_cores to get a higher number)
35
+ MAX_AUTO_CPU_CORES = 8
36
+
37
  LANGUAGES = [
38
  "English", "Chinese", "German", "Spanish", "Russian", "Korean",
39
  "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan",
 
70
  def set_parallel_devices(self, vad_parallel_devices: str):
71
  self.parallel_device_list = [ device.strip() for device in vad_parallel_devices.split(",") ] if vad_parallel_devices else None
72
 
73
+ def set_auto_parallel(self, auto_parallel: bool):
74
+ if auto_parallel:
75
+ if torch.cuda.is_available():
76
+ self.parallel_device_list = [ str(gpu_id) for gpu_id in range(torch.cuda.device_count())]
77
+
78
+ self.vad_cpu_cores = min(os.cpu_count(), MAX_AUTO_CPU_CORES)
79
+ print("[Auto parallel] Using GPU devices " + str(self.parallel_device_list) + " and " + str(self.vad_cpu_cores) + " CPU cores for VAD/transcription.")
80
+
81
  def transcribe_webui(self, modelName, languageName, urlData, uploadFile, microphoneData, task, vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow):
82
  try:
83
  source, sourceName = self.__get_source(urlData, uploadFile, microphoneData)
 
281
 
282
 
283
  def create_ui(input_audio_max_duration, share=False, server_name: str = None, server_port: int = 7860,
284
+ default_model_name: str = "medium", default_vad: str = None, vad_parallel_devices: str = None, vad_process_timeout: float = None, vad_cpu_cores: int = 1, auto_parallel: bool = False):
285
  ui = WhisperTranscriber(input_audio_max_duration, vad_process_timeout, vad_cpu_cores)
286
 
287
  # Specify a list of devices to use for parallel processing
288
  ui.set_parallel_devices(vad_parallel_devices)
289
+ ui.set_auto_parallel(auto_parallel)
290
 
291
  ui_description = "Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse "
292
  ui_description += " audio and is also a multi-task model that can perform multilingual speech recognition "
 
333
  parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
334
  parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
335
  parser.add_argument("--vad_process_timeout", type=float, default="1800", help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.")
336
+ parser.add_argument("--auto_parallel", type=bool, default=False, help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.")
337
 
338
  args = parser.parse_args().__dict__
339
  create_ui(**args)
cli.py CHANGED
@@ -34,6 +34,7 @@ def cli():
34
  parser.add_argument("--vad_prompt_window", type=optional_float, default=3, help="The window size of the prompt to pass to Whisper")
35
  parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
36
  parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
 
37
 
38
  parser.add_argument("--temperature", type=float, default=0, help="temperature to use for sampling")
39
  parser.add_argument("--best_of", type=optional_int, default=5, help="number of candidates when sampling with non-zero temperature")
@@ -75,10 +76,12 @@ def cli():
75
  vad_padding = args.pop("vad_padding")
76
  vad_prompt_window = args.pop("vad_prompt_window")
77
  vad_cpu_cores = args.pop("vad_cpu_cores")
 
78
 
79
  model = WhisperContainer(model_name, device=device, download_root=model_dir)
80
  transcriber = WhisperTranscriber(delete_uploaded_files=False, vad_cpu_cores=vad_cpu_cores)
81
  transcriber.set_parallel_devices(args.pop("vad_parallel_devices"))
 
82
 
83
  if (transcriber._has_parallel_devices()):
84
  print("Using parallel devices:", transcriber.parallel_device_list)
 
34
  parser.add_argument("--vad_prompt_window", type=optional_float, default=3, help="The window size of the prompt to pass to Whisper")
35
  parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
36
  parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
37
+ parser.add_argument("--auto_parallel", type=bool, default=False, help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.")
38
 
39
  parser.add_argument("--temperature", type=float, default=0, help="temperature to use for sampling")
40
  parser.add_argument("--best_of", type=optional_int, default=5, help="number of candidates when sampling with non-zero temperature")
 
76
  vad_padding = args.pop("vad_padding")
77
  vad_prompt_window = args.pop("vad_prompt_window")
78
  vad_cpu_cores = args.pop("vad_cpu_cores")
79
+ auto_parallel = args.pop("auto_parallel")
80
 
81
  model = WhisperContainer(model_name, device=device, download_root=model_dir)
82
  transcriber = WhisperTranscriber(delete_uploaded_files=False, vad_cpu_cores=vad_cpu_cores)
83
  transcriber.set_parallel_devices(args.pop("vad_parallel_devices"))
84
+ transcriber.set_auto_parallel(auto_parallel)
85
 
86
  if (transcriber._has_parallel_devices()):
87
  print("Using parallel devices:", transcriber.parallel_device_list)
src/vadParallel.py CHANGED
@@ -176,6 +176,10 @@ class ParallelTranscription(AbstractTranscription):
176
  while (chunk_start < total_duration):
177
  chunk_end = min(chunk_start + chunk_size, total_duration)
178
 
 
 
 
 
179
  print("Parallel VAD: Executing chunk from " + str(chunk_start) + " to " +
180
  str(chunk_end) + " on CPU device " + str(cpu_device_id))
181
  parameters.append([audio, config, chunk_start, chunk_end]);
 
176
  while (chunk_start < total_duration):
177
  chunk_end = min(chunk_start + chunk_size, total_duration)
178
 
179
+ if (chunk_end - chunk_start < 1):
180
+ # No need to process chunks that are less than 1 second
181
+ break
182
+
183
  print("Parallel VAD: Executing chunk from " + str(chunk_start) + " to " +
184
  str(chunk_end) + " on CPU device " + str(cpu_device_id))
185
  parameters.append([audio, config, chunk_start, chunk_end]);