File size: 2,651 Bytes
a9fdafe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import whisper
import moviepy.editor as mp
import gradio as gr
import torch
import subprocess

device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "tiny"
whisper_model = whisper.load_model(model_name).to(device)

def generate_srt(transcription_result):
    srt_content = ""
    for i, segment in enumerate(transcription_result['segments']):
        start = segment['start']
        end = segment['end']
        start_time = f"{int(start//3600):02}:{int((start%3600)//60):02}:{int(start%60):02},{int((start%1)*1000):03}"
        end_time = f"{int(end//3600):02}:{int((end%3600)//60):02}:{int(end%60):02},{int((end%1)*1000):03}"
        srt_content += f"{i+1}\n{start_time} --> {end_time}\n{segment['text'].strip()}\n\n"
    return srt_content

def extract_audio_ffmpeg(video_file, audio_output):
    subprocess.run([
        'ffmpeg',
        '-i', video_file,
        '-vn', 
        '-acodec', 'pcm_s16le', 
        '-ar', '16000', 
        audio_output, 
        '-y'
    ])

def transcribe_and_generate_subtitles(video):
    audio_path = "temp_audio.wav"
    extract_audio_ffmpeg(video, audio_path)
    transcription_result = whisper_model.transcribe(audio_path, language="en", verbose=False)
    detected_language = transcription_result['language']
    if detected_language == "hau":
        transcription_result = whisper_model.transcribe(audio_path, task="translate", verbose=False)
    elif detected_language == "yor":
        transcription_result = whisper_model.transcribe(audio_path, task="translate", language="yor", verbose=False)
    elif detected_language == "ibo":
        transcription_result = whisper_model.transcribe(audio_path, task="translate", language="ibo", verbose=False)
    srt_content = generate_srt(transcription_result)
    srt_file = "output_subtitles.srt"
    with open(srt_file, "w", encoding="utf-8") as f:
        f.write(srt_content)
    output_video = "video_with_subtitles.mp4"
    subprocess.run([
        'ffmpeg',
        '-i', video,
        '-vf', f"subtitles={srt_file}",
        output_video,
        '-y'
    ])
    return transcription_result["text"], output_video

interface = gr.Interface(
    fn=transcribe_and_generate_subtitles,
    inputs=gr.Video(label="Upload Video File"),
    outputs=[
        gr.Textbox(label="Transcription or Translation"),
        gr.File(label="Download Video with Subtitles")
    ],
    title="Video Subtitle Generator",
    description="Upload a video in either English, Hausa, Yoruba, or Igbo. The system will detect the language, transcribe or translate if necessary, and generate a video with subtitles embedded.",
    live=False
)
interface.launch()