killian31 commited on
Commit
5fd1d62
·
0 Parent(s):

feat: working app and install scripts

Browse files
Files changed (5) hide show
  1. README.md +26 -0
  2. app.py +66 -0
  3. install_linux.sh +4 -0
  4. install_macos.sh +4 -0
  5. requirements.txt +5 -0
README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Speech to Video Subtitles
2
+
3
+ Get your synchronized subtitiled video in seconds!
4
+
5
+ ## Installation
6
+
7
+ In your terminal, run
8
+
9
+ ```bash
10
+ git clone https://github.com/killian31/AudioVisualTranscription
11
+ cd AudioVisualTranscription
12
+ pip install -r requirements.txt
13
+ ```
14
+
15
+ The app needs ImageMagick and ffmpeg to run. To install them, run
16
+
17
+ - MacOS: `bash ./install_macos.sh`
18
+ - Debian/Ubuntu: `chmod +x install_linux.sh; ./install_linux.sh`
19
+
20
+ ## Usage
21
+
22
+ Launch the Gradio app with
23
+
24
+ ```bash
25
+ python3 app.py
26
+ ```
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import torch
4
+ import whisper
5
+ from moviepy.editor import *
6
+ from moviepy.video.VideoClip import TextClip
7
+
8
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
+
10
+ model = whisper.load_model("base", device=DEVICE)
11
+
12
+
13
+ def generate_video(audio_path, language):
14
+ # Transcribe audio
15
+ result = model.transcribe(audio_path, language=language)
16
+
17
+ # Prepare video clips from transcription segments
18
+ clips = []
19
+ for segment in result["segments"]:
20
+ text_clip = (
21
+ TextClip(
22
+ segment["text"],
23
+ fontsize=24,
24
+ font="Arial",
25
+ color="white",
26
+ bg_color="black",
27
+ size=(1280, 720),
28
+ )
29
+ .set_duration(segment["end"] - segment["start"])
30
+ .set_start(segment["start"])
31
+ )
32
+ clips.append(text_clip)
33
+
34
+ # Concatenate clips and set audio
35
+ video = concatenate_videoclips(clips, method="compose")
36
+ video = video.set_audio(AudioFileClip(audio_path))
37
+
38
+ # Export video to a buffer
39
+ output_path = "./transcribed_video.mp4"
40
+ video.write_videofile(output_path, fps=6, codec="libx264", audio_codec="aac")
41
+
42
+ return output_path
43
+
44
+
45
+ if __name__ == "__main__":
46
+
47
+ print(
48
+ f"Model is {'multilingual' if model.is_multilingual else 'English-only'} "
49
+ f"and has {sum(np.prod(p.shape) for p in model.parameters()):,} parameters."
50
+ )
51
+ # Gradio interface
52
+ iface = gr.Interface(
53
+ fn=generate_video,
54
+ inputs=[
55
+ gr.Audio(sources=["upload", "microphone"], type="filepath"),
56
+ gr.Dropdown(
57
+ ["en", "es", "fr", "de", "it", "nl", "ru", "zh"],
58
+ label="Language",
59
+ ),
60
+ ],
61
+ outputs=gr.Video(label="Play Video", show_download_button=True),
62
+ title="Audio Transcription Video Generator",
63
+ description="Upload your audio file and select the language for transcription.",
64
+ )
65
+
66
+ iface.launch()
install_linux.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ sudo apt update
4
+ sudo apt install -y imagemagick ffmpeg
install_macos.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ brew install imagemagick
4
+ brew install ffmpeg
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ moviepy
3
+ numpy
4
+ openai_whisper
5
+ torch