File size: 2,469 Bytes
cec1f0b
 
 
 
 
 
21d36f8
cec1f0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr 
import os
import subprocess
import whisper
from whisper.utils import write_vtt

model = whisper.load_model("small")

def video2mp3(video_file, output_ext="mp3"):
    filename, ext = os.path.splitext(video_file)
    subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"], 
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT)
    return f"{filename}.{output_ext}"

def transcribe_audio(file):
    options = dict(task="transcribe", best_of=5, fp16 = False)
    text = model.transcribe(file, **options)["text"]
    return text.strip()

def translate_audio(file):
    options = dict(task="translate", best_of=5, fp16 = False)
    text = model.transcribe(file, **options)["text"]
    return text.strip()

def translate_video(input_video):
    audio_file = video2mp3(input_video)
    
    options = dict(beam_size=5, best_of=5, fp16 = False)
    translate_options = dict(task="translate", **options)
    result = model.transcribe(audio_file,**translate_options)

    output_dir = '/content/'
    audio_path = audio_file.split(".")[0]

    with open(os.path.join(output_dir, audio_path + ".vtt"), "w") as vtt:
      write_vtt(result["segments"], file=vtt)

    subtitle = audio_path + ".vtt"
    output_video = audio_path + "_subtitled.mp4"

    os.system(f"ffmpeg -i {input_video} -vf subtitles={subtitle} {output_video}")

    return output_video

block = gr.Blocks()

with block:

    with gr.Group():
        with gr.Box(): 
            with gr.Row().style():
                inp_video = gr.Video(
                    label="Input Video",
                    type="filepath",
                    mirror_webcam = False
                )
                op_video = gr.Video()
            btn = gr.Button("Generate Subtitle Video")
        btn.click(translate_video, inputs=[inp_video], outputs=[op_video])
    with gr.Group():
        audio = gr.Audio(
            show_label=False,
            source="microphone",
            type="filepath"
        )
        with gr.Box():
            with gr.Row().style(equal_height=True):
                transcribe_button = gr.Button("Transcribe")
                translate_button = gr.Button("Translate")
        textbox = gr.Textbox(show_label=False)
        transcribe_button.click(transcribe_audio, inputs=[audio], outputs=[textbox])
        translate_button.click(translate_audio, inputs=[audio], outputs=[textbox])

block.launch(debug = True)