avans06 commited on
Commit
852f4fa
1 Parent(s): 9428712

When the language is set to Chinese,

Browse files

the method of converting simplified Chinese to traditional Chinese is changed to using the method provided by zhconv.

When the "--merge_subtitle_with_sources" argument is enabled,
the video file extension will be appended with the subtitle language information, such as .en, .zh, .jp, etc.

The downloaded YouTube videos will be added with the format [vcodec^=avc1].

app.py CHANGED
@@ -19,6 +19,7 @@ from src.hooks.subTaskProgressListener import SubTaskProgressListener
19
  from src.hooks.whisperProgressHook import create_progress_listener_handle
20
  from src.languages import _TO_LANGUAGE_CODE
21
  from src.languages import get_language_names
 
22
  from src.modelCache import ModelCache
23
  from src.prompts.jsonPromptStrategy import JsonPromptStrategy
24
  from src.prompts.prependPromptStrategy import PrependPromptStrategy
@@ -38,6 +39,7 @@ from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
38
  from src.whisper.whisperFactory import create_whisper_container
39
 
40
  import shutil
 
41
 
42
  # Configure more application defaults in config.json5
43
 
@@ -102,14 +104,11 @@ class WhisperTranscriber:
102
  vad, vadMergeWindow, vadMaxMergeSize,
103
  word_timestamps: bool = False, highlight_words: bool = False,
104
  progress=gr.Progress()):
105
- decodeOptions = dict(word_timestamps=word_timestamps)
106
- if languageName == "Chinese":
107
- decodeOptions.update(initial_prompt="繁體: ")
108
- self.app_config.vad_initial_prompt_mode = "prepend_all_segments"
109
 
110
  vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, self.app_config.vad_padding, self.app_config.vad_prompt_window, self.app_config.vad_initial_prompt_mode)
111
 
112
- return self.transcribe_webui(modelName, languageName, urlData, multipleFiles, microphoneData, task, vadOptions, highlight_words=highlight_words, progress=progress, **decodeOptions)
 
113
 
114
  # Entry function for the full tab
115
  def transcribe_webui_full(self, modelName, languageName, urlData, multipleFiles, microphoneData, task,
@@ -143,10 +142,6 @@ class WhisperTranscriber:
143
  else:
144
  temperature = [temperature]
145
 
146
- if languageName == "Chinese":
147
- initial_prompt = "繁體: " + initial_prompt
148
- self.app_config.vad_initial_prompt_mode = "prepend_all_segments"
149
-
150
  vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow, vadInitialPromptMode)
151
 
152
  return self.transcribe_webui(modelName, languageName, urlData, multipleFiles, microphoneData, task, vadOptions,
@@ -163,7 +158,8 @@ class WhisperTranscriber:
163
  sources = self.__get_source(urlData, multipleFiles, microphoneData)
164
 
165
  try:
166
- selectedLanguage = languageName.lower() if len(languageName) > 0 else None
 
167
  selectedModel = modelName if modelName is not None else "base"
168
 
169
  model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
@@ -256,6 +252,18 @@ class WhisperTranscriber:
256
  return download, text, vtt
257
 
258
  finally:
 
 
 
 
 
 
 
 
 
 
 
 
259
  # Cleanup source
260
  if self.deleteUploadedFiles:
261
  for source in sources:
@@ -266,13 +274,14 @@ class WhisperTranscriber:
266
  srt_path = source_download[0]
267
  save_path = os.path.join(self.app_config.output_dir, source.source_name)
268
  save_without_ext, ext = os.path.splitext(save_path)
269
- output_with_srt = save_without_ext + ".srt" + ext
 
270
 
271
  #ffmpeg -i "input.mp4" -i "input.srt" -c copy -c:s mov_text output.mp4
272
  input_file = ffmpeg.input(source.source_path)
273
  input_srt = ffmpeg.input(srt_path)
274
  out = ffmpeg.output(input_file, input_srt, output_with_srt, vcodec='copy', acodec='copy', scodec='mov_text')
275
- outRsult = out.run()
276
  except Exception as e:
277
  # Ignore error - it's just a cleanup
278
  print("Error merge subtitle with source file: \n" + source.source_path + ", \n" + str(e), outRsult)
 
19
  from src.hooks.whisperProgressHook import create_progress_listener_handle
20
  from src.languages import _TO_LANGUAGE_CODE
21
  from src.languages import get_language_names
22
+ from src.languages import get_language_from_name
23
  from src.modelCache import ModelCache
24
  from src.prompts.jsonPromptStrategy import JsonPromptStrategy
25
  from src.prompts.prependPromptStrategy import PrependPromptStrategy
 
39
  from src.whisper.whisperFactory import create_whisper_container
40
 
41
  import shutil
42
+ import zhconv
43
 
44
  # Configure more application defaults in config.json5
45
 
 
104
  vad, vadMergeWindow, vadMaxMergeSize,
105
  word_timestamps: bool = False, highlight_words: bool = False,
106
  progress=gr.Progress()):
 
 
 
 
107
 
108
  vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, self.app_config.vad_padding, self.app_config.vad_prompt_window, self.app_config.vad_initial_prompt_mode)
109
 
110
+ return self.transcribe_webui(modelName, languageName, urlData, multipleFiles, microphoneData, task, vadOptions,
111
+ word_timestamps=word_timestamps, highlight_words=highlight_words, progress=progress)
112
 
113
  # Entry function for the full tab
114
  def transcribe_webui_full(self, modelName, languageName, urlData, multipleFiles, microphoneData, task,
 
142
  else:
143
  temperature = [temperature]
144
 
 
 
 
 
145
  vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow, vadInitialPromptMode)
146
 
147
  return self.transcribe_webui(modelName, languageName, urlData, multipleFiles, microphoneData, task, vadOptions,
 
158
  sources = self.__get_source(urlData, multipleFiles, microphoneData)
159
 
160
  try:
161
+ langObj = get_language_from_name(languageName)
162
+ selectedLanguage = languageName.lower() if languageName is not None and len(languageName) > 0 else None
163
  selectedModel = modelName if modelName is not None else "base"
164
 
165
  model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
 
252
  return download, text, vtt
253
 
254
  finally:
255
+ if languageName == "Chinese":
256
+ for file_path in source_download:
257
+ try:
258
+ with open(file_path, "r+", encoding="utf-8") as source:
259
+ content = source.read()
260
+ content = zhconv.convert(content, "zh-tw")
261
+ source.seek(0)
262
+ source.write(content)
263
+ except Exception as e:
264
+ # Ignore error - it's just a cleanup
265
+ print("Error converting Traditional Chinese with download source file: \n" + file_path + ", \n" + str(e))
266
+
267
  # Cleanup source
268
  if self.deleteUploadedFiles:
269
  for source in sources:
 
274
  srt_path = source_download[0]
275
  save_path = os.path.join(self.app_config.output_dir, source.source_name)
276
  save_without_ext, ext = os.path.splitext(save_path)
277
+ lang_ext = "." + langObj.code if langObj is not None else ""
278
+ output_with_srt = save_without_ext + lang_ext + ext
279
 
280
  #ffmpeg -i "input.mp4" -i "input.srt" -c copy -c:s mov_text output.mp4
281
  input_file = ffmpeg.input(source.source_path)
282
  input_srt = ffmpeg.input(srt_path)
283
  out = ffmpeg.output(input_file, input_srt, output_with_srt, vcodec='copy', acodec='copy', scodec='mov_text')
284
+ outRsult = out.run(overwrite_output=True)
285
  except Exception as e:
286
  # Ignore error - it's just a cleanup
287
  print("Error merge subtitle with source file: \n" + source.source_path + ", \n" + str(e), outRsult)
requirements-fasterWhisper.txt CHANGED
@@ -6,4 +6,5 @@ yt-dlp
6
  json5
7
  torch
8
  torchaudio
9
- more_itertools
 
 
6
  json5
7
  torch
8
  torchaudio
9
+ more_itertools
10
+ zhconv
requirements-whisper.txt CHANGED
@@ -6,4 +6,5 @@ gradio==3.36.0
6
  yt-dlp
7
  torchaudio
8
  altair
9
- json5
 
 
6
  yt-dlp
7
  torchaudio
8
  altair
9
+ json5
10
+ zhconv
requirements.txt CHANGED
@@ -6,4 +6,5 @@ yt-dlp
6
  json5
7
  torch
8
  torchaudio
9
- more_itertools
 
 
6
  json5
7
  torch
8
  torchaudio
9
+ more_itertools
10
+ zhconv
src/download.py CHANGED
@@ -29,7 +29,7 @@ def _perform_download(url: str, maxDuration: int = None, outputTemplate: str = N
29
  destinationDirectory = mkdtemp()
30
 
31
  ydl_opts = {
32
- "format": "bestaudio/best" if onlyAudio else "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best",
33
  'paths': {
34
  'home': destinationDirectory
35
  }
 
29
  destinationDirectory = mkdtemp()
30
 
31
  ydl_opts = {
32
+ "format": "bestaudio/best" if onlyAudio else "bestvideo[ext=mp4][vcodec^=avc1]+bestaudio[ext=m4a]/best",
33
  'paths': {
34
  'home': destinationDirectory
35
  }
src/vad.py CHANGED
@@ -204,7 +204,7 @@ class AbstractTranscription(ABC):
204
  detected_language = languageCounter.most_common(1)[0][0] if len(languageCounter) > 0 else None
205
 
206
  print("Running whisper from ", format_timestamp(segment_start), " to ", format_timestamp(segment_end), ", duration: ",
207
- segment_duration, "expanded: ", segment_expand_amount, "prompt: ", segment_prompt, "language: ", detected_language)
208
 
209
  perf_start_time = time.perf_counter()
210
 
 
204
  detected_language = languageCounter.most_common(1)[0][0] if len(languageCounter) > 0 else None
205
 
206
  print("Running whisper from ", format_timestamp(segment_start), " to ", format_timestamp(segment_end), ", duration: ",
207
+ segment_duration, "expanded: ", segment_expand_amount, ", prompt: ", segment_prompt, ", detected language: ", detected_language)
208
 
209
  perf_start_time = time.perf_counter()
210