Spaces:

CharlieAmalet
/

tools3ox_api_subtitles

Running on Zero

App Files Files Community

Charlie Amalet commited on Mar 18, 2024

Commit

2e802f8

verified ·

1 Parent(s): 7a0c320

Upload app.py

Browse files

Files changed (1) hide show

app.py +118 -0

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import whisper
+from whisper.transcribe import LANGUAGES, TO_LANGUAGE_CODE
+from whisper.utils import get_writer
+import torch
+import gradio as gr
+import pathlib
+import random
+from datetime import datetime
+APP_DIR = pathlib.Path(__file__).parent.absolute()
+LOCAL_DIR = APP_DIR / "files"
+LOCAL_DIR.mkdir(exist_ok=True)
+SAVE_DIR = LOCAL_DIR / "transcripts"
+SAVE_DIR.mkdir(exist_ok=True)
+LANGS = [lang.capitalize() for lang in list(LANGUAGES.values())]
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+loaded_model = whisper.load_model("medium", DEVICE)
+current_size = "None"
+def generate_random_filename():
+    now = datetime.now()
+    timestamp = now.strftime("%H_%M_%S_%d_%m_%Y")
+    random_suffix = ''.join(random.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=8))
+    filename = f"transcript_{timestamp}_{random_suffix}"
+    return filename
+def get_transcript(audio_path, task_selection:str, language:str, max_line_width=0, max_line_count=0, max_words_per_line=0):
+    output_format = "all"
+    writer = get_writer(output_format, SAVE_DIR)
+    writer_args = {
+        "max_line_width": max_line_width if max_line_width > 0 else None,
+        "max_line_count": max_line_count if max_line_count > 0 else None,
+        "max_words_per_line": max_words_per_line if max_words_per_line > 0 else None
+    }
+    options = dict(task=task_selection.lower(), best_of=5, language=TO_LANGUAGE_CODE[language.lower()])
+    results = loaded_model.transcribe(audio_path, verbose=True, word_timestamps=True, **options)
+    # sample_rate, audio = audiodata
+    # results = loaded_model.transcribe(audio, verbose=True, word_timestamps=True, **options)
+    filename = generate_random_filename()
+    writer(results, filename, **writer_args)
+    return str(SAVE_DIR / f"{filename}.txt"), str(SAVE_DIR / f"{filename}.srt"), str(SAVE_DIR / f"{filename}.vtt")
+# input_audio = gr.Audio(label="Upload an audio file", type="file")
+# task_selection = gr.Radio(["Transcribe", "Translate"], label="Select Task")
+# output_transcript = gr.Textbox(label="Transcript")
+# block = gr.Blocks(
+#     inputs=input_audio,
+#     outputs=output_transcript,
+#     title="Auto Transcriber",
+#     description="Input an audio file and get a transcript.",
+#     update_fn=transcriber,
+#     inputs_layout="vertical",
+#     outputs_layout="vertical",
+#     input_component_labels=["Audio File", "Task"],
+#     output_component_labels=["Transcript"]
+# )
+# block.launch()
+title="""
+<div style="text-align: center; max-width: 500px; margin: 0 auto;">
+        <div
+        style="
+            display: inline-flex;
+            align-items: center;
+            gap: 0.8rem;
+            font-size: 1.75rem;
+            margin-bottom: 10px;
+        "
+        >
+        <h1 style="font-weight: 600; margin-bottom: 7px;">
+            Auto Transcriber 🔊
+        </h1>
+        </div>
+    </div>
+"""
+with gr.Blocks() as monapp:
+    with gr.Column():
+        gr.HTML(title)
+        with gr.Row():
+            with gr.Column():
+                audio_input = gr.Audio(label="Audiofile to transcribe", sources=["upload"], type="filepath")
+                with gr.Accordion("Transcribe options", open=True):
+                    task_selection = gr.Radio(["Transcribe", "Translate"], value="Transcribe", label="Select a Task")
+                    language = gr.Dropdown(choices=LANGS, value="English", label="Language spoken in the audio")
+                    with gr.Column():
+                        gr.HTML("<p>keep at 0 to <strong>don't use</strong></p>\n<p>max_words_per_line has no effect with max_line_width activated\nWord-level timestamps on translations may not be reliable.</p>")
+                        # gr.HTML("<p>max_words_per_line has no effect with max_line_width activated</p>")
+                        max_line_width = gr.Number(label="Maximum number of characters in a line before breaking the line", minimum=0, precision=0, value=0, step=1)
+                        max_line_count = gr.Number(label="Maximum number of lines in a segment", minimum=0, precision=0, value=0, step=1)
+                        max_words_per_line = gr.Number(label="Maximum number of words in a segment", minimum=0, precision=0, value=0, step=1)
+                    # with gr.Group():
+                    #     active_img_bg= gr.Checkbox(False, label="Enable Background image")
+                    #     img_bg = gr.Textbox(None, label="Background image", placeholder="Background image path", show_label=False)
+                submit_btn = gr.Button("Transcribe")
+            with gr.Column():
+                transcript_txt = gr.File(height=50)
+                transcript_srt = gr.File(height=50)
+                transcript_vtt = gr.File(height=50)
+    submit_btn.click(fn=get_transcript, inputs=[audio_input, task_selection, language, max_line_width, max_line_count, max_words_per_line], outputs=[transcript_txt, transcript_srt, transcript_vtt])
+monapp.launch(debug=True, show_error=True)
+# loaded_model = whisper.load_model("base", DEVICE)
+# loaded_model.transcribe
+# parser.add_argument("--highlight_words", type=str2bool, default=False, help="(requires --word_timestamps True) underline each word as it is spoken in srt and vtt")
+# parser.add_argument("--max_line_width", type=optional_int, default=None, help="(requires --word_timestamps True) the maximum number of characters in a line before breaking the line")
+# parser.add_argument("--max_line_count", type=optional_int, default=None, help="(requires --word_timestamps True) the maximum number of lines in a segment")
+# parser.add_argument("--max_words_per_line", type=optional_int, default=None, help="(requires --word_timestamps True, no effect with --max_line_width) the maximum number of words in a segment")