PhenixNova commited on
Commit
cec1f0b
1 Parent(s): f881b76

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import subprocess
4
+ import whisper
5
+ from whisper.utils import write_vtt
6
+
7
+ model = whisper.load_model("medium")
8
+
9
+ def video2mp3(video_file, output_ext="mp3"):
10
+ filename, ext = os.path.splitext(video_file)
11
+ subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
12
+ stdout=subprocess.DEVNULL,
13
+ stderr=subprocess.STDOUT)
14
+ return f"{filename}.{output_ext}"
15
+
16
+ def transcribe_audio(file):
17
+ options = dict(task="transcribe", best_of=5, fp16 = False)
18
+ text = model.transcribe(file, **options)["text"]
19
+ return text.strip()
20
+
21
+ def translate_audio(file):
22
+ options = dict(task="translate", best_of=5, fp16 = False)
23
+ text = model.transcribe(file, **options)["text"]
24
+ return text.strip()
25
+
26
+ def translate_video(input_video):
27
+ audio_file = video2mp3(input_video)
28
+
29
+ options = dict(beam_size=5, best_of=5, fp16 = False)
30
+ translate_options = dict(task="translate", **options)
31
+ result = model.transcribe(audio_file,**translate_options)
32
+
33
+ output_dir = '/content/'
34
+ audio_path = audio_file.split(".")[0]
35
+
36
+ with open(os.path.join(output_dir, audio_path + ".vtt"), "w") as vtt:
37
+ write_vtt(result["segments"], file=vtt)
38
+
39
+ subtitle = audio_path + ".vtt"
40
+ output_video = audio_path + "_subtitled.mp4"
41
+
42
+ os.system(f"ffmpeg -i {input_video} -vf subtitles={subtitle} {output_video}")
43
+
44
+ return output_video
45
+
46
+ block = gr.Blocks()
47
+
48
+ with block:
49
+
50
+ with gr.Group():
51
+ with gr.Box():
52
+ with gr.Row().style():
53
+ inp_video = gr.Video(
54
+ label="Input Video",
55
+ type="filepath",
56
+ mirror_webcam = False
57
+ )
58
+ op_video = gr.Video()
59
+ btn = gr.Button("Generate Subtitle Video")
60
+ btn.click(translate_video, inputs=[inp_video], outputs=[op_video])
61
+ with gr.Group():
62
+ audio = gr.Audio(
63
+ show_label=False,
64
+ source="microphone",
65
+ type="filepath"
66
+ )
67
+ with gr.Box():
68
+ with gr.Row().style(equal_height=True):
69
+ transcribe_button = gr.Button("Transcribe")
70
+ translate_button = gr.Button("Translate")
71
+ textbox = gr.Textbox(show_label=False)
72
+ transcribe_button.click(transcribe_audio, inputs=[audio], outputs=[textbox])
73
+ translate_button.click(translate_audio, inputs=[audio], outputs=[textbox])
74
+
75
+ block.launch(debug = True)