Spaces:

aadnk
/

faster-whisper-webui

Running

aadnk commited on Feb 7

Commit

60c92fc

•

2 Parent(s): 8063dc1 99ae9fb

Merge branch 'main' of https://huggingface.co/spaces/aadnk/whisper-webui

Files changed (5) hide show

app.py CHANGED Viewed

@@ -501,16 +501,20 @@ class WhisperTranscriber:
         language = result["language"] if "language" in result else None
         languageMaxLineWidth = self.__get_max_line_width(language)
         print("Max line width " + str(languageMaxLineWidth))
         vtt = self.__get_subs(result["segments"], "vtt", languageMaxLineWidth, highlight_words=highlight_words)
         srt = self.__get_subs(result["segments"], "srt", languageMaxLineWidth, highlight_words=highlight_words)
-        json_result = json.dumps(result, indent=4, ensure_ascii=False)
         output_files = []
         output_files.append(self.__create_file(srt, output_dir, source_name + "-subs.srt"));
         output_files.append(self.__create_file(vtt, output_dir, source_name + "-subs.vtt"));
         output_files.append(self.__create_file(text, output_dir, source_name + "-transcript.txt"));
-        output_files.append(self.__create_file(json_result, output_dir, source_name + "-result.json"));
         return output_files, text, vtt

         language = result["language"] if "language" in result else None
         languageMaxLineWidth = self.__get_max_line_width(language)
+        # We always create the JSON file for debugging purposes
+        json_result = json.dumps(result, indent=4, ensure_ascii=False)
+        json_file = self.__create_file(json_result, output_dir, source_name + "-result.json")
+        print("Created JSON file " + json_file)
         print("Max line width " + str(languageMaxLineWidth))
         vtt = self.__get_subs(result["segments"], "vtt", languageMaxLineWidth, highlight_words=highlight_words)
         srt = self.__get_subs(result["segments"], "srt", languageMaxLineWidth, highlight_words=highlight_words)
         output_files = []
         output_files.append(self.__create_file(srt, output_dir, source_name + "-subs.srt"));
         output_files.append(self.__create_file(vtt, output_dir, source_name + "-subs.vtt"));
         output_files.append(self.__create_file(text, output_dir, source_name + "-transcript.txt"));
+        output_files.append(json_file)
         return output_files, text, vtt

requirements-fasterWhisper.txt CHANGED Viewed

@@ -2,6 +2,7 @@ ctranslate2
 faster-whisper
 ffmpeg-python==0.2.0
 gradio==3.38.0
 yt-dlp
 json5
 torch

 faster-whisper
 ffmpeg-python==0.2.0
 gradio==3.38.0
+gradio-client==0.8.1
 yt-dlp
 json5
 torch

requirements-whisper.txt CHANGED Viewed

@@ -3,6 +3,7 @@ git+https://github.com/openai/whisper.git
 transformers
 ffmpeg-python==0.2.0
 gradio==3.38.0
 yt-dlp
 torchaudio
 altair

 transformers
 ffmpeg-python==0.2.0
 gradio==3.38.0
+gradio-client==0.8.1
 yt-dlp
 torchaudio
 altair

requirements.txt CHANGED Viewed

@@ -2,6 +2,7 @@ ctranslate2
 faster-whisper
 ffmpeg-python==0.2.0
 gradio==3.38.0
 yt-dlp
 json5
 torch

 faster-whisper
 ffmpeg-python==0.2.0
 gradio==3.38.0
+gradio-client==0.8.1
 yt-dlp
 json5
 torch

src/utils.py CHANGED Viewed

@@ -63,14 +63,18 @@ def write_vtt(transcript: Iterator[dict], file: TextIO,
     print("WEBVTT\n", file=file)
     for segment in iterator:
-        text = segment['text'].replace('-->', '->')
-        print(
-            f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
-            f"{text}\n",
-            file=file,
-            flush=True,
-        )
 def write_srt(transcript: Iterator[dict], file: TextIO,
               maxLineWidth=None, highlight_words: bool = False):

     print("WEBVTT\n", file=file)
     for segment in iterator:
+        try:
+            text = segment['text'].replace('-->', '->')
+            print(
+                f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
+                f"{text}\n",
+                file=file,
+                flush=True,
+            )
+        except Exception as e:
+            print(f"Error writing segment {segment}: {e}")
+            raise
 def write_srt(transcript: Iterator[dict], file: TextIO,
               maxLineWidth=None, highlight_words: bool = False):