aadnk commited on
Commit
60c92fc
2 Parent(s): 8063dc1 99ae9fb

Merge branch 'main' of https://huggingface.co/spaces/aadnk/whisper-webui

Browse files
app.py CHANGED
@@ -501,16 +501,20 @@ class WhisperTranscriber:
501
  language = result["language"] if "language" in result else None
502
  languageMaxLineWidth = self.__get_max_line_width(language)
503
 
 
 
 
 
 
504
  print("Max line width " + str(languageMaxLineWidth))
505
  vtt = self.__get_subs(result["segments"], "vtt", languageMaxLineWidth, highlight_words=highlight_words)
506
  srt = self.__get_subs(result["segments"], "srt", languageMaxLineWidth, highlight_words=highlight_words)
507
- json_result = json.dumps(result, indent=4, ensure_ascii=False)
508
 
509
  output_files = []
510
  output_files.append(self.__create_file(srt, output_dir, source_name + "-subs.srt"));
511
  output_files.append(self.__create_file(vtt, output_dir, source_name + "-subs.vtt"));
512
  output_files.append(self.__create_file(text, output_dir, source_name + "-transcript.txt"));
513
- output_files.append(self.__create_file(json_result, output_dir, source_name + "-result.json"));
514
 
515
  return output_files, text, vtt
516
 
 
501
  language = result["language"] if "language" in result else None
502
  languageMaxLineWidth = self.__get_max_line_width(language)
503
 
504
+ # We always create the JSON file for debugging purposes
505
+ json_result = json.dumps(result, indent=4, ensure_ascii=False)
506
+ json_file = self.__create_file(json_result, output_dir, source_name + "-result.json")
507
+ print("Created JSON file " + json_file)
508
+
509
  print("Max line width " + str(languageMaxLineWidth))
510
  vtt = self.__get_subs(result["segments"], "vtt", languageMaxLineWidth, highlight_words=highlight_words)
511
  srt = self.__get_subs(result["segments"], "srt", languageMaxLineWidth, highlight_words=highlight_words)
 
512
 
513
  output_files = []
514
  output_files.append(self.__create_file(srt, output_dir, source_name + "-subs.srt"));
515
  output_files.append(self.__create_file(vtt, output_dir, source_name + "-subs.vtt"));
516
  output_files.append(self.__create_file(text, output_dir, source_name + "-transcript.txt"));
517
+ output_files.append(json_file)
518
 
519
  return output_files, text, vtt
520
 
requirements-fasterWhisper.txt CHANGED
@@ -2,6 +2,7 @@ ctranslate2
2
  faster-whisper
3
  ffmpeg-python==0.2.0
4
  gradio==3.38.0
 
5
  yt-dlp
6
  json5
7
  torch
 
2
  faster-whisper
3
  ffmpeg-python==0.2.0
4
  gradio==3.38.0
5
+ gradio-client==0.8.1
6
  yt-dlp
7
  json5
8
  torch
requirements-whisper.txt CHANGED
@@ -3,6 +3,7 @@ git+https://github.com/openai/whisper.git
3
  transformers
4
  ffmpeg-python==0.2.0
5
  gradio==3.38.0
 
6
  yt-dlp
7
  torchaudio
8
  altair
 
3
  transformers
4
  ffmpeg-python==0.2.0
5
  gradio==3.38.0
6
+ gradio-client==0.8.1
7
  yt-dlp
8
  torchaudio
9
  altair
requirements.txt CHANGED
@@ -2,6 +2,7 @@ ctranslate2
2
  faster-whisper
3
  ffmpeg-python==0.2.0
4
  gradio==3.38.0
 
5
  yt-dlp
6
  json5
7
  torch
 
2
  faster-whisper
3
  ffmpeg-python==0.2.0
4
  gradio==3.38.0
5
+ gradio-client==0.8.1
6
  yt-dlp
7
  json5
8
  torch
src/utils.py CHANGED
@@ -63,14 +63,18 @@ def write_vtt(transcript: Iterator[dict], file: TextIO,
63
  print("WEBVTT\n", file=file)
64
 
65
  for segment in iterator:
66
- text = segment['text'].replace('-->', '->')
67
-
68
- print(
69
- f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
70
- f"{text}\n",
71
- file=file,
72
- flush=True,
73
- )
 
 
 
 
74
 
75
  def write_srt(transcript: Iterator[dict], file: TextIO,
76
  maxLineWidth=None, highlight_words: bool = False):
 
63
  print("WEBVTT\n", file=file)
64
 
65
  for segment in iterator:
66
+ try:
67
+ text = segment['text'].replace('-->', '->')
68
+
69
+ print(
70
+ f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
71
+ f"{text}\n",
72
+ file=file,
73
+ flush=True,
74
+ )
75
+ except Exception as e:
76
+ print(f"Error writing segment {segment}: {e}")
77
+ raise
78
 
79
  def write_srt(transcript: Iterator[dict], file: TextIO,
80
  maxLineWidth=None, highlight_words: bool = False):