avans06 commited on
Commit
92bc446
·
1 Parent(s): c78307c

Integration of progress bar with translation model compatibility

Browse files
app.py CHANGED
@@ -156,13 +156,15 @@ class WhisperTranscriber:
156
  word_timestamps=word_timestamps, prepend_punctuations=prepend_punctuations, append_punctuations=append_punctuations, highlight_words=highlight_words,
157
  progress=progress)
158
 
159
- def transcribe_webui(self, modelName, languageName, nllbModelName, nllbLangName, urlData, multipleFiles, microphoneData, task,
160
  vadOptions: VadOptions, progress: gr.Progress = None, highlight_words: bool = False,
161
  **decodeOptions: dict):
162
  try:
 
163
  sources = self.__get_source(urlData, multipleFiles, microphoneData)
164
 
165
  try:
 
166
  whisper_lang = get_language_from_name(languageName)
167
  selectedLanguage = languageName.lower() if languageName is not None and len(languageName) > 0 else None
168
  selectedModel = modelName if modelName is not None else "base"
@@ -170,13 +172,15 @@ class WhisperTranscriber:
170
  model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
171
  model_name=selectedModel, compute_type=self.app_config.compute_type,
172
  cache=self.model_cache, models=self.app_config.models)
173
-
 
174
  nllb_lang = get_nllb_lang_from_name(nllbLangName)
175
  selectedNllbModelName = nllbModelName if nllbModelName is not None and len(nllbModelName) > 0 else "nllb-200-distilled-600M/facebook"
176
  selectedNllbModel = next((modelConfig for modelConfig in self.app_config.nllb_models if modelConfig.name == selectedNllbModelName), None)
177
-
178
  nllb_model = NllbModel(model_config=selectedNllbModel, whisper_lang=whisper_lang, nllb_lang=nllb_lang) # load_model=True
179
-
 
180
  # Result
181
  download = []
182
  zip_file_lookup = {}
@@ -186,6 +190,7 @@ class WhisperTranscriber:
186
  # Write result
187
  downloadDirectory = tempfile.mkdtemp()
188
  source_index = 0
 
189
 
190
  outputDirectory = self.output_dir if self.output_dir is not None else downloadDirectory
191
 
@@ -195,9 +200,10 @@ class WhisperTranscriber:
195
 
196
  # A listener that will report progress to Gradio
197
  root_progress_listener = self._create_progress_listener(progress)
 
198
 
199
  # Execute whisper
200
- for source in sources:
201
  source_prefix = ""
202
  source_audio_duration = source.get_audio_duration()
203
 
@@ -208,9 +214,9 @@ class WhisperTranscriber:
208
  print("Transcribing ", source.source_path)
209
 
210
  scaled_progress_listener = SubTaskProgressListener(root_progress_listener,
211
- base_task_total=total_duration,
212
- sub_task_start=current_progress,
213
- sub_task_total=source_audio_duration)
214
 
215
  # Transcribe
216
  result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
@@ -219,7 +225,7 @@ class WhisperTranscriber:
219
  # Update progress
220
  current_progress += source_audio_duration
221
 
222
- source_download, source_text, source_vtt = self.write_result(result, nllb_model, filePrefix, outputDirectory, highlight_words)
223
 
224
  if len(sources) > 1:
225
  # Add new line separators
@@ -377,9 +383,9 @@ class WhisperTranscriber:
377
  def __init__(self, progress: gr.Progress):
378
  self.progress = progress
379
 
380
- def on_progress(self, current: Union[int, float], total: Union[int, float]):
381
  # From 0 to 1
382
- self.progress(current / total)
383
 
384
  def on_finished(self):
385
  self.progress(1)
@@ -435,7 +441,7 @@ class WhisperTranscriber:
435
 
436
  return config
437
 
438
- def write_result(self, result: dict, nllb_model: NllbModel, source_name: str, output_dir: str, highlight_words: bool = False):
439
  if not os.path.exists(output_dir):
440
  os.makedirs(output_dir)
441
 
@@ -446,6 +452,10 @@ class WhisperTranscriber:
446
 
447
  if nllb_model.nllb_lang is not None:
448
  try:
 
 
 
 
449
  pbar = tqdm.tqdm(total=len(segments))
450
  perf_start_time = time.perf_counter()
451
  nllb_model.load_model()
@@ -456,9 +466,14 @@ class WhisperTranscriber:
456
  if nllb_model.nllb_lang is not None:
457
  segment["text"] = nllb_model.translation(seg_text)
458
  pbar.update(1)
 
459
 
460
  nllb_model.release_vram()
461
  perf_end_time = time.perf_counter()
 
 
 
 
462
  print("\n\nprocess segments took {} seconds.\n\n".format(perf_end_time - perf_start_time))
463
  except Exception as e:
464
  # Ignore error - it's just a cleanup
 
156
  word_timestamps=word_timestamps, prepend_punctuations=prepend_punctuations, append_punctuations=append_punctuations, highlight_words=highlight_words,
157
  progress=progress)
158
 
159
+ def transcribe_webui(self, modelName: str, languageName: str, nllbModelName: str, nllbLangName: str, urlData: str, multipleFiles, microphoneData: str, task: str,
160
  vadOptions: VadOptions, progress: gr.Progress = None, highlight_words: bool = False,
161
  **decodeOptions: dict):
162
  try:
163
+ progress(0, desc="init audio sources")
164
  sources = self.__get_source(urlData, multipleFiles, microphoneData)
165
 
166
  try:
167
+ progress(0, desc="init whisper model")
168
  whisper_lang = get_language_from_name(languageName)
169
  selectedLanguage = languageName.lower() if languageName is not None and len(languageName) > 0 else None
170
  selectedModel = modelName if modelName is not None else "base"
 
172
  model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
173
  model_name=selectedModel, compute_type=self.app_config.compute_type,
174
  cache=self.model_cache, models=self.app_config.models)
175
+
176
+ progress(0, desc="init translate model")
177
  nllb_lang = get_nllb_lang_from_name(nllbLangName)
178
  selectedNllbModelName = nllbModelName if nllbModelName is not None and len(nllbModelName) > 0 else "nllb-200-distilled-600M/facebook"
179
  selectedNllbModel = next((modelConfig for modelConfig in self.app_config.nllb_models if modelConfig.name == selectedNllbModelName), None)
180
+
181
  nllb_model = NllbModel(model_config=selectedNllbModel, whisper_lang=whisper_lang, nllb_lang=nllb_lang) # load_model=True
182
+
183
+ progress(0, desc="init transcribe")
184
  # Result
185
  download = []
186
  zip_file_lookup = {}
 
190
  # Write result
191
  downloadDirectory = tempfile.mkdtemp()
192
  source_index = 0
193
+ extra_tasks_count = 1 if nllb_lang is not None else 0
194
 
195
  outputDirectory = self.output_dir if self.output_dir is not None else downloadDirectory
196
 
 
200
 
201
  # A listener that will report progress to Gradio
202
  root_progress_listener = self._create_progress_listener(progress)
203
+ sub_task_total = 1/(len(sources)+extra_tasks_count*len(sources))
204
 
205
  # Execute whisper
206
+ for idx, source in enumerate(sources):
207
  source_prefix = ""
208
  source_audio_duration = source.get_audio_duration()
209
 
 
214
  print("Transcribing ", source.source_path)
215
 
216
  scaled_progress_listener = SubTaskProgressListener(root_progress_listener,
217
+ base_task_total=1,
218
+ sub_task_start=idx*1/len(sources),
219
+ sub_task_total=sub_task_total)
220
 
221
  # Transcribe
222
  result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
 
225
  # Update progress
226
  current_progress += source_audio_duration
227
 
228
+ source_download, source_text, source_vtt = self.write_result(result, nllb_model, filePrefix, outputDirectory, highlight_words, scaled_progress_listener)
229
 
230
  if len(sources) > 1:
231
  # Add new line separators
 
383
  def __init__(self, progress: gr.Progress):
384
  self.progress = progress
385
 
386
+ def on_progress(self, current: Union[int, float], total: Union[int, float], desc: str = None):
387
  # From 0 to 1
388
+ self.progress(current / total, desc=desc)
389
 
390
  def on_finished(self):
391
  self.progress(1)
 
441
 
442
  return config
443
 
444
+ def write_result(self, result: dict, nllb_model: NllbModel, source_name: str, output_dir: str, highlight_words: bool = False, progressListener: ProgressListener = None):
445
  if not os.path.exists(output_dir):
446
  os.makedirs(output_dir)
447
 
 
452
 
453
  if nllb_model.nllb_lang is not None:
454
  try:
455
+ segments_progress_listener = SubTaskProgressListener(progressListener,
456
+ base_task_total=progressListener.sub_task_total,
457
+ sub_task_start=1,
458
+ sub_task_total=1)
459
  pbar = tqdm.tqdm(total=len(segments))
460
  perf_start_time = time.perf_counter()
461
  nllb_model.load_model()
 
466
  if nllb_model.nllb_lang is not None:
467
  segment["text"] = nllb_model.translation(seg_text)
468
  pbar.update(1)
469
+ segments_progress_listener.on_progress(idx+1, len(segments), "Process segments")
470
 
471
  nllb_model.release_vram()
472
  perf_end_time = time.perf_counter()
473
+ # Call the finished callback
474
+ if segments_progress_listener is not None:
475
+ segments_progress_listener.on_finished()
476
+
477
  print("\n\nprocess segments took {} seconds.\n\n".format(perf_end_time - perf_start_time))
478
  except Exception as e:
479
  # Ignore error - it's just a cleanup
src/hooks/progressListener.py CHANGED
@@ -1,7 +1,7 @@
1
  from typing import Union
2
 
3
  class ProgressListener:
4
- def on_progress(self, current: Union[int, float], total: Union[int, float]):
5
  self.total = total
6
 
7
  def on_finished(self):
 
1
  from typing import Union
2
 
3
  class ProgressListener:
4
+ def on_progress(self, current: Union[int, float], total: Union[int, float], desc: str = None):
5
  self.total = total
6
 
7
  def on_finished(self):
src/hooks/subTaskProgressListener.py CHANGED
@@ -25,13 +25,13 @@ class SubTaskProgressListener(ProgressListener):
25
  ):
26
  self.base_task_listener = base_task_listener
27
  self.base_task_total = base_task_total
28
- self.sub_task_start = sub_task_start
29
- self.sub_task_total = sub_task_total
30
 
31
- def on_progress(self, current: Union[int, float], total: Union[int, float]):
32
  sub_task_progress_frac = current / total
33
  sub_task_progress = self.sub_task_start + self.sub_task_total * sub_task_progress_frac
34
- self.base_task_listener.on_progress(sub_task_progress, self.base_task_total)
35
 
36
  def on_finished(self):
37
  self.base_task_listener.on_progress(self.sub_task_start + self.sub_task_total, self.base_task_total)
 
25
  ):
26
  self.base_task_listener = base_task_listener
27
  self.base_task_total = base_task_total
28
+ self.sub_task_start = base_task_total*sub_task_start
29
+ self.sub_task_total = base_task_total*sub_task_total
30
 
31
+ def on_progress(self, current: Union[int, float], total: Union[int, float], desc: str = None):
32
  sub_task_progress_frac = current / total
33
  sub_task_progress = self.sub_task_start + self.sub_task_total * sub_task_progress_frac
34
+ self.base_task_listener.on_progress(sub_task_progress, self.base_task_total, desc=desc)
35
 
36
  def on_finished(self):
37
  self.base_task_listener.on_progress(self.sub_task_start + self.sub_task_total, self.base_task_total)
src/nllb/nllbModel.py CHANGED
@@ -54,6 +54,9 @@ class NllbModel:
54
  self.nllb_lang = nllb_lang
55
  self.model_config = model_config
56
 
 
 
 
57
  if os.path.isdir(model_config.url):
58
  self.model_path = model_config.url
59
  else:
 
54
  self.nllb_lang = nllb_lang
55
  self.model_config = model_config
56
 
57
+ if nllb_lang is None:
58
+ return
59
+
60
  if os.path.isdir(model_config.url):
61
  self.model_path = model_config.url
62
  else:
src/vad.py CHANGED
@@ -181,9 +181,10 @@ class AbstractTranscription(ABC):
181
  # Calculate progress
182
  progress_start_offset = merged[0]['start'] if len(merged) > 0 else 0
183
  progress_total_duration = sum([segment['end'] - segment['start'] for segment in merged])
 
184
 
185
  # For each time segment, run whisper
186
- for segment in merged:
187
  segment_index += 1
188
  segment_start = segment['start']
189
  segment_end = segment['end']
@@ -208,8 +209,10 @@ class AbstractTranscription(ABC):
208
 
209
  perf_start_time = time.perf_counter()
210
 
211
- scaled_progress_listener = SubTaskProgressListener(progressListener, base_task_total=progress_total_duration,
212
- sub_task_start=segment_start - progress_start_offset, sub_task_total=segment_duration)
 
 
213
  segment_result = whisperCallable.invoke(segment_audio, segment_index, segment_prompt, detected_language, progress_listener=scaled_progress_listener)
214
 
215
  perf_end_time = time.perf_counter()
 
181
  # Calculate progress
182
  progress_start_offset = merged[0]['start'] if len(merged) > 0 else 0
183
  progress_total_duration = sum([segment['end'] - segment['start'] for segment in merged])
184
+ sub_task_total = 1/len(merged)
185
 
186
  # For each time segment, run whisper
187
+ for idx, segment in enumerate(merged):
188
  segment_index += 1
189
  segment_start = segment['start']
190
  segment_end = segment['end']
 
209
 
210
  perf_start_time = time.perf_counter()
211
 
212
+ scaled_progress_listener = SubTaskProgressListener(progressListener,
213
+ base_task_total=progressListener.sub_task_total if isinstance(progressListener, SubTaskProgressListener) else progress_total_duration,
214
+ sub_task_start=idx*(1/len(merged)),
215
+ sub_task_total=1/len(merged))
216
  segment_result = whisperCallable.invoke(segment_audio, segment_index, segment_prompt, detected_language, progress_listener=scaled_progress_listener)
217
 
218
  perf_end_time = time.perf_counter()
src/vadParallel.py CHANGED
@@ -18,7 +18,7 @@ class _ProgressListenerToQueue(ProgressListener):
18
  self.progress_total = 0
19
  self.prev_progress = 0
20
 
21
- def on_progress(self, current: Union[int, float], total: Union[int, float]):
22
  delta = current - self.prev_progress
23
  self.prev_progress = current
24
  self.progress_total = total
@@ -178,7 +178,7 @@ class ParallelTranscription(AbstractTranscription):
178
 
179
  total_progress += delta
180
  if progress_listener is not None:
181
- progress_listener.on_progress(total_progress, total_duration)
182
 
183
  results = results_async.get()
184
 
 
18
  self.progress_total = 0
19
  self.prev_progress = 0
20
 
21
+ def on_progress(self, current: Union[int, float], total: Union[int, float], desc: str = None):
22
  delta = current - self.prev_progress
23
  self.prev_progress = current
24
  self.progress_total = total
 
178
 
179
  total_progress += delta
180
  if progress_listener is not None:
181
+ progress_listener.on_progress(total_progress, total_duration, desc="Transcribe parallel")
182
 
183
  results = results_async.get()
184
 
src/whisper/fasterWhisperContainer.py CHANGED
@@ -150,7 +150,7 @@ class FasterWhisperCallback(AbstractWhisperCallback):
150
  segments.append(segment)
151
 
152
  if progress_listener is not None:
153
- progress_listener.on_progress(segment.end, info.duration)
154
  if verbose:
155
  print("[{}->{}] {}".format(format_timestamp(segment.start, True), format_timestamp(segment.end, True),
156
  segment.text))
 
150
  segments.append(segment)
151
 
152
  if progress_listener is not None:
153
+ progress_listener.on_progress(segment.end, info.duration, "Transcribe")
154
  if verbose:
155
  print("[{}->{}] {}".format(format_timestamp(segment.start, True), format_timestamp(segment.end, True),
156
  segment.text))