Spaces:

nelikCode
/

AudioVisualTranscription

Running

App Files Files Community

killian31 commited on Feb 24, 2024

Commit

5fd1d62

0 Parent(s):

feat: working app and install scripts

Browse files

Files changed (5) hide show

README.md +26 -0
app.py +66 -0
install_linux.sh +4 -0
install_macos.sh +4 -0
requirements.txt +5 -0

README.md ADDED Viewed

	@@ -0,0 +1,26 @@

+# Speech to Video Subtitles
+Get your synchronized subtitiled video in seconds!
+## Installation
+In your terminal, run
+```bash
+git clone https://github.com/killian31/AudioVisualTranscription
+cd AudioVisualTranscription
+pip install -r requirements.txt
+```
+The app needs ImageMagick and ffmpeg to run. To install them, run
+- MacOS: `bash ./install_macos.sh`
+- Debian/Ubuntu: `chmod +x install_linux.sh; ./install_linux.sh`
+## Usage
+Launch the Gradio app with
+```bash
+python3 app.py
+```

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import gradio as gr
+import numpy as np
+import torch
+import whisper
+from moviepy.editor import *
+from moviepy.video.VideoClip import TextClip
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+model = whisper.load_model("base", device=DEVICE)
+def generate_video(audio_path, language):
+    # Transcribe audio
+    result = model.transcribe(audio_path, language=language)
+    # Prepare video clips from transcription segments
+    clips = []
+    for segment in result["segments"]:
+        text_clip = (
+            TextClip(
+                segment["text"],
+                fontsize=24,
+                font="Arial",
+                color="white",
+                bg_color="black",
+                size=(1280, 720),
+            )
+            .set_duration(segment["end"] - segment["start"])
+            .set_start(segment["start"])
+        )
+        clips.append(text_clip)
+    # Concatenate clips and set audio
+    video = concatenate_videoclips(clips, method="compose")
+    video = video.set_audio(AudioFileClip(audio_path))
+    # Export video to a buffer
+    output_path = "./transcribed_video.mp4"
+    video.write_videofile(output_path, fps=6, codec="libx264", audio_codec="aac")
+    return output_path
+if __name__ == "__main__":
+    print(
+        f"Model is {'multilingual' if model.is_multilingual else 'English-only'} "
+        f"and has {sum(np.prod(p.shape) for p in model.parameters()):,} parameters."
+    )
+    # Gradio interface
+    iface = gr.Interface(
+        fn=generate_video,
+        inputs=[
+            gr.Audio(sources=["upload", "microphone"], type="filepath"),
+            gr.Dropdown(
+                ["en", "es", "fr", "de", "it", "nl", "ru", "zh"],
+                label="Language",
+            ),
+        ],
+        outputs=gr.Video(label="Play Video", show_download_button=True),
+        title="Audio Transcription Video Generator",
+        description="Upload your audio file and select the language for transcription.",
+    )
+    iface.launch()

install_linux.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+#!/bin/bash
+sudo apt update
+sudo apt install -y imagemagick ffmpeg

install_macos.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+#!/bin/bash
+brew install imagemagick
+brew install ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+moviepy
+numpy
+openai_whisper
+torch