avans06 commited on
Commit
3ab1530
1 Parent(s): 9428712

When the language is set to Chinese,

Browse files

the method of converting simplified Chinese to traditional Chinese is changed to using the method provided by zhconv.

When the "--merge_subtitle_with_sources" argument is enabled,
the video file extension will be appended with the subtitle language information, such as .en, .zh, .jp, etc.

The downloaded YouTube videos will be added with the format [vcodec^=avc1].

app.py CHANGED
@@ -19,6 +19,7 @@ from src.hooks.subTaskProgressListener import SubTaskProgressListener
19
  from src.hooks.whisperProgressHook import create_progress_listener_handle
20
  from src.languages import _TO_LANGUAGE_CODE
21
  from src.languages import get_language_names
 
22
  from src.modelCache import ModelCache
23
  from src.prompts.jsonPromptStrategy import JsonPromptStrategy
24
  from src.prompts.prependPromptStrategy import PrependPromptStrategy
@@ -38,6 +39,7 @@ from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
38
  from src.whisper.whisperFactory import create_whisper_container
39
 
40
  import shutil
 
41
 
42
  # Configure more application defaults in config.json5
43
 
@@ -102,14 +104,11 @@ class WhisperTranscriber:
102
  vad, vadMergeWindow, vadMaxMergeSize,
103
  word_timestamps: bool = False, highlight_words: bool = False,
104
  progress=gr.Progress()):
105
- decodeOptions = dict(word_timestamps=word_timestamps)
106
- if languageName == "Chinese":
107
- decodeOptions.update(initial_prompt="繁體: ")
108
- self.app_config.vad_initial_prompt_mode = "prepend_all_segments"
109
 
110
  vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, self.app_config.vad_padding, self.app_config.vad_prompt_window, self.app_config.vad_initial_prompt_mode)
111
 
112
- return self.transcribe_webui(modelName, languageName, urlData, multipleFiles, microphoneData, task, vadOptions, highlight_words=highlight_words, progress=progress, **decodeOptions)
 
113
 
114
  # Entry function for the full tab
115
  def transcribe_webui_full(self, modelName, languageName, urlData, multipleFiles, microphoneData, task,
@@ -143,10 +142,6 @@ class WhisperTranscriber:
143
  else:
144
  temperature = [temperature]
145
 
146
- if languageName == "Chinese":
147
- initial_prompt = "繁體: " + initial_prompt
148
- self.app_config.vad_initial_prompt_mode = "prepend_all_segments"
149
-
150
  vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow, vadInitialPromptMode)
151
 
152
  return self.transcribe_webui(modelName, languageName, urlData, multipleFiles, microphoneData, task, vadOptions,
@@ -163,7 +158,8 @@ class WhisperTranscriber:
163
  sources = self.__get_source(urlData, multipleFiles, microphoneData)
164
 
165
  try:
166
- selectedLanguage = languageName.lower() if len(languageName) > 0 else None
 
167
  selectedModel = modelName if modelName is not None else "base"
168
 
169
  model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
@@ -266,13 +262,14 @@ class WhisperTranscriber:
266
  srt_path = source_download[0]
267
  save_path = os.path.join(self.app_config.output_dir, source.source_name)
268
  save_without_ext, ext = os.path.splitext(save_path)
269
- output_with_srt = save_without_ext + ".srt" + ext
 
270
 
271
  #ffmpeg -i "input.mp4" -i "input.srt" -c copy -c:s mov_text output.mp4
272
  input_file = ffmpeg.input(source.source_path)
273
  input_srt = ffmpeg.input(srt_path)
274
  out = ffmpeg.output(input_file, input_srt, output_with_srt, vcodec='copy', acodec='copy', scodec='mov_text')
275
- outRsult = out.run()
276
  except Exception as e:
277
  # Ignore error - it's just a cleanup
278
  print("Error merge subtitle with source file: \n" + source.source_path + ", \n" + str(e), outRsult)
@@ -439,6 +436,12 @@ class WhisperTranscriber:
439
  srt = self.__get_subs(result["segments"], "srt", languageMaxLineWidth, highlight_words=highlight_words)
440
  json_result = json.dumps(result, indent=4, ensure_ascii=False)
441
 
 
 
 
 
 
 
442
  output_files = []
443
  output_files.append(self.__create_file(srt, output_dir, source_name + "-subs.srt"));
444
  output_files.append(self.__create_file(vtt, output_dir, source_name + "-subs.vtt"));
 
19
  from src.hooks.whisperProgressHook import create_progress_listener_handle
20
  from src.languages import _TO_LANGUAGE_CODE
21
  from src.languages import get_language_names
22
+ from src.languages import get_language_from_name
23
  from src.modelCache import ModelCache
24
  from src.prompts.jsonPromptStrategy import JsonPromptStrategy
25
  from src.prompts.prependPromptStrategy import PrependPromptStrategy
 
39
  from src.whisper.whisperFactory import create_whisper_container
40
 
41
  import shutil
42
+ import zhconv
43
 
44
  # Configure more application defaults in config.json5
45
 
 
104
  vad, vadMergeWindow, vadMaxMergeSize,
105
  word_timestamps: bool = False, highlight_words: bool = False,
106
  progress=gr.Progress()):
 
 
 
 
107
 
108
  vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, self.app_config.vad_padding, self.app_config.vad_prompt_window, self.app_config.vad_initial_prompt_mode)
109
 
110
+ return self.transcribe_webui(modelName, languageName, urlData, multipleFiles, microphoneData, task, vadOptions,
111
+ word_timestamps=word_timestamps, highlight_words=highlight_words, progress=progress)
112
 
113
  # Entry function for the full tab
114
  def transcribe_webui_full(self, modelName, languageName, urlData, multipleFiles, microphoneData, task,
 
142
  else:
143
  temperature = [temperature]
144
 
 
 
 
 
145
  vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow, vadInitialPromptMode)
146
 
147
  return self.transcribe_webui(modelName, languageName, urlData, multipleFiles, microphoneData, task, vadOptions,
 
158
  sources = self.__get_source(urlData, multipleFiles, microphoneData)
159
 
160
  try:
161
+ langObj = get_language_from_name(languageName)
162
+ selectedLanguage = languageName.lower() if languageName is not None and len(languageName) > 0 else None
163
  selectedModel = modelName if modelName is not None else "base"
164
 
165
  model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
 
262
  srt_path = source_download[0]
263
  save_path = os.path.join(self.app_config.output_dir, source.source_name)
264
  save_without_ext, ext = os.path.splitext(save_path)
265
+ lang_ext = "." + langObj.code if langObj is not None else ""
266
+ output_with_srt = save_without_ext + lang_ext + ext
267
 
268
  #ffmpeg -i "input.mp4" -i "input.srt" -c copy -c:s mov_text output.mp4
269
  input_file = ffmpeg.input(source.source_path)
270
  input_srt = ffmpeg.input(srt_path)
271
  out = ffmpeg.output(input_file, input_srt, output_with_srt, vcodec='copy', acodec='copy', scodec='mov_text')
272
+ outRsult = out.run(overwrite_output=True)
273
  except Exception as e:
274
  # Ignore error - it's just a cleanup
275
  print("Error merge subtitle with source file: \n" + source.source_path + ", \n" + str(e), outRsult)
 
436
  srt = self.__get_subs(result["segments"], "srt", languageMaxLineWidth, highlight_words=highlight_words)
437
  json_result = json.dumps(result, indent=4, ensure_ascii=False)
438
 
439
+ if language == "zh":
440
+ vtt = zhconv.convert(vtt, "zh-tw")
441
+ srt = zhconv.convert(srt, "zh-tw")
442
+ text = zhconv.convert(text, "zh-tw")
443
+ json_result = zhconv.convert(json_result, "zh-tw")
444
+
445
  output_files = []
446
  output_files.append(self.__create_file(srt, output_dir, source_name + "-subs.srt"));
447
  output_files.append(self.__create_file(vtt, output_dir, source_name + "-subs.vtt"));
requirements-fasterWhisper.txt CHANGED
@@ -6,4 +6,5 @@ yt-dlp
6
  json5
7
  torch
8
  torchaudio
9
- more_itertools
 
 
6
  json5
7
  torch
8
  torchaudio
9
+ more_itertools
10
+ zhconv
requirements-whisper.txt CHANGED
@@ -6,4 +6,5 @@ gradio==3.36.0
6
  yt-dlp
7
  torchaudio
8
  altair
9
- json5
 
 
6
  yt-dlp
7
  torchaudio
8
  altair
9
+ json5
10
+ zhconv
requirements.txt CHANGED
@@ -6,4 +6,5 @@ yt-dlp
6
  json5
7
  torch
8
  torchaudio
9
- more_itertools
 
 
6
  json5
7
  torch
8
  torchaudio
9
+ more_itertools
10
+ zhconv
src/download.py CHANGED
@@ -29,7 +29,7 @@ def _perform_download(url: str, maxDuration: int = None, outputTemplate: str = N
29
  destinationDirectory = mkdtemp()
30
 
31
  ydl_opts = {
32
- "format": "bestaudio/best" if onlyAudio else "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best",
33
  'paths': {
34
  'home': destinationDirectory
35
  }
 
29
  destinationDirectory = mkdtemp()
30
 
31
  ydl_opts = {
32
+ "format": "bestaudio/best" if onlyAudio else "bestvideo[ext=mp4][vcodec^=avc1]+bestaudio[ext=m4a]/best",
33
  'paths': {
34
  'home': destinationDirectory
35
  }
src/vad.py CHANGED
@@ -204,7 +204,7 @@ class AbstractTranscription(ABC):
204
  detected_language = languageCounter.most_common(1)[0][0] if len(languageCounter) > 0 else None
205
 
206
  print("Running whisper from ", format_timestamp(segment_start), " to ", format_timestamp(segment_end), ", duration: ",
207
- segment_duration, "expanded: ", segment_expand_amount, "prompt: ", segment_prompt, "language: ", detected_language)
208
 
209
  perf_start_time = time.perf_counter()
210
 
 
204
  detected_language = languageCounter.most_common(1)[0][0] if len(languageCounter) > 0 else None
205
 
206
  print("Running whisper from ", format_timestamp(segment_start), " to ", format_timestamp(segment_end), ", duration: ",
207
+ segment_duration, "expanded: ", segment_expand_amount, ", prompt: ", segment_prompt, ", detected language: ", detected_language)
208
 
209
  perf_start_time = time.perf_counter()
210