Spaces:
Sleeping
Sleeping
killian31
commited on
Commit
·
5fd1d62
0
Parent(s):
feat: working app and install scripts
Browse files- README.md +26 -0
- app.py +66 -0
- install_linux.sh +4 -0
- install_macos.sh +4 -0
- requirements.txt +5 -0
README.md
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Speech to Video Subtitles
|
2 |
+
|
3 |
+
Get your synchronized subtitiled video in seconds!
|
4 |
+
|
5 |
+
## Installation
|
6 |
+
|
7 |
+
In your terminal, run
|
8 |
+
|
9 |
+
```bash
|
10 |
+
git clone https://github.com/killian31/AudioVisualTranscription
|
11 |
+
cd AudioVisualTranscription
|
12 |
+
pip install -r requirements.txt
|
13 |
+
```
|
14 |
+
|
15 |
+
The app needs ImageMagick and ffmpeg to run. To install them, run
|
16 |
+
|
17 |
+
- MacOS: `bash ./install_macos.sh`
|
18 |
+
- Debian/Ubuntu: `chmod +x install_linux.sh; ./install_linux.sh`
|
19 |
+
|
20 |
+
## Usage
|
21 |
+
|
22 |
+
Launch the Gradio app with
|
23 |
+
|
24 |
+
```bash
|
25 |
+
python3 app.py
|
26 |
+
```
|
app.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
import whisper
|
5 |
+
from moviepy.editor import *
|
6 |
+
from moviepy.video.VideoClip import TextClip
|
7 |
+
|
8 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
9 |
+
|
10 |
+
model = whisper.load_model("base", device=DEVICE)
|
11 |
+
|
12 |
+
|
13 |
+
def generate_video(audio_path, language):
|
14 |
+
# Transcribe audio
|
15 |
+
result = model.transcribe(audio_path, language=language)
|
16 |
+
|
17 |
+
# Prepare video clips from transcription segments
|
18 |
+
clips = []
|
19 |
+
for segment in result["segments"]:
|
20 |
+
text_clip = (
|
21 |
+
TextClip(
|
22 |
+
segment["text"],
|
23 |
+
fontsize=24,
|
24 |
+
font="Arial",
|
25 |
+
color="white",
|
26 |
+
bg_color="black",
|
27 |
+
size=(1280, 720),
|
28 |
+
)
|
29 |
+
.set_duration(segment["end"] - segment["start"])
|
30 |
+
.set_start(segment["start"])
|
31 |
+
)
|
32 |
+
clips.append(text_clip)
|
33 |
+
|
34 |
+
# Concatenate clips and set audio
|
35 |
+
video = concatenate_videoclips(clips, method="compose")
|
36 |
+
video = video.set_audio(AudioFileClip(audio_path))
|
37 |
+
|
38 |
+
# Export video to a buffer
|
39 |
+
output_path = "./transcribed_video.mp4"
|
40 |
+
video.write_videofile(output_path, fps=6, codec="libx264", audio_codec="aac")
|
41 |
+
|
42 |
+
return output_path
|
43 |
+
|
44 |
+
|
45 |
+
if __name__ == "__main__":
|
46 |
+
|
47 |
+
print(
|
48 |
+
f"Model is {'multilingual' if model.is_multilingual else 'English-only'} "
|
49 |
+
f"and has {sum(np.prod(p.shape) for p in model.parameters()):,} parameters."
|
50 |
+
)
|
51 |
+
# Gradio interface
|
52 |
+
iface = gr.Interface(
|
53 |
+
fn=generate_video,
|
54 |
+
inputs=[
|
55 |
+
gr.Audio(sources=["upload", "microphone"], type="filepath"),
|
56 |
+
gr.Dropdown(
|
57 |
+
["en", "es", "fr", "de", "it", "nl", "ru", "zh"],
|
58 |
+
label="Language",
|
59 |
+
),
|
60 |
+
],
|
61 |
+
outputs=gr.Video(label="Play Video", show_download_button=True),
|
62 |
+
title="Audio Transcription Video Generator",
|
63 |
+
description="Upload your audio file and select the language for transcription.",
|
64 |
+
)
|
65 |
+
|
66 |
+
iface.launch()
|
install_linux.sh
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
sudo apt update
|
4 |
+
sudo apt install -y imagemagick ffmpeg
|
install_macos.sh
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
brew install imagemagick
|
4 |
+
brew install ffmpeg
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
moviepy
|
3 |
+
numpy
|
4 |
+
openai_whisper
|
5 |
+
torch
|