mlkorra commited on
Commit
f7ece39
·
1 Parent(s): 34ae93e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -57
app.py CHANGED
@@ -1,65 +1,92 @@
1
- import gradio as gr
2
- import os
3
- import sys
4
- import subprocess
5
- #from moviepy.editor import VideoFileClip
6
-
7
  import whisper
8
  from whisper.utils import write_vtt
9
 
10
- model = whisper.load_model("medium")
11
-
12
- title = "Add Text/Caption to your YouTube Shorts - MultiLingual"
13
-
14
- def video2mp3(video_file, output_ext="mp3"):
15
- filename, ext = os.path.splitext(video_file)
16
- subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
17
- stdout=subprocess.DEVNULL,
18
- stderr=subprocess.STDOUT)
19
- return f"{filename}.{output_ext}"
20
-
21
-
22
- def translate(input_video):
23
-
24
- audio_file = video2mp3(input_video)
25
-
26
- options = dict(beam_size=5, best_of=5, fp16 = False)
27
- translate_options = dict(task="translate", **options)
28
- result = model.transcribe(audio_file,**translate_options)
29
-
30
- output_dir = ''
31
- audio_path = audio_file.split(".")[0]
32
-
33
- with open(os.path.join(output_dir, audio_path + ".vtt"), "w") as vtt:
34
- write_vtt(result["segments"], file=vtt)
35
-
36
- subtitle = audio_path + ".vtt"
37
- output_video = audio_path + "_subtitled.mp4"
38
-
39
- os.system(f"ffmpeg -i {input_video} -vf subtitles={subtitle} {output_video}")
40
-
41
- return output_video
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  block = gr.Blocks()
44
- with block:
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  with gr.Group():
47
  with gr.Box():
48
- with gr.Row().style():
49
- inp_video = gr.Video(
50
- label="Input Video",
51
- type="filepath",
52
- mirror_webcam = False
53
- )
54
- op_video = gr.Video()
55
- btn = gr.Button("Generate Subtitle Video")
56
-
57
-
58
-
59
-
60
-
61
-
62
- btn.click(translate, inputs=[inp_video], outputs=[op_video])
63
-
64
-
65
- block.launch(enable_queue = True)
 
 
 
 
 
 
 
1
+ import gradio as gr
 
 
 
 
 
2
  import whisper
3
  from whisper.utils import write_vtt
4
 
5
+ from pytube import YouTube
6
+ import os
7
+ import sys
8
+ import subprocess
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ loaded_model = whisper.load_model("base")
11
+ current_size = 'base'
12
+
13
+ def inference(link):
14
+
15
+ yt = YouTube(link)
16
+
17
+ audio_path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
18
+ print(f'audio path : {audio_path}')
19
+ video_path = yt.streams.filter(file_extension='mp4')
20
+
21
+ #options = whisper.DecodingOptions(without_timestamps=True)
22
+ options = dict(beam_size=5, best_of=5, fp16 = False)
23
+ translate_options = dict(task="inference", **options)
24
+ results = loaded_model.transcribe(audio_path,**translate_options)
25
+
26
+ #output_dir = ''
27
+ path = audio_path.split(".")[0]
28
+ print(path)
29
+
30
+ with open(path + ".vtt", "w") as vtt:
31
+ write_vtt(results["segments"], file=vtt)
32
+
33
+ subtitle = path + ".vtt"
34
+ output_video = path + "_subtitled.mp4"
35
+
36
+ os.system(f"ffmpeg -i {video_path} -vf subtitles={subtitle} {output_video}")
37
+
38
+ return output_video
39
+
40
+ def change_model(size):
41
+ if size == current_size:
42
+ return
43
+ loaded_model = whisper.load_model(size)
44
+ current_size = size
45
+
46
+ def populate_metadata(link):
47
+ yt = YouTube(link)
48
+ return yt.thumbnail_url, yt.title
49
+
50
+ title="Youtube Caption Generator"
51
+ description="Generate captions of Youtube videos using OpenAI's Whisper"
52
  block = gr.Blocks()
 
53
 
54
+ with block:
55
+ gr.HTML(
56
+ """
57
+ <div style="text-align: center; max-width: 500px; margin: 0 auto;">
58
+ <div>
59
+ <h1>Youtube Caption Generator</h1>
60
+ </div>
61
+ <p style="margin-bottom: 10px; font-size: 94%">
62
+ Generate captions of Youtube videos using OpenAI's Whisper
63
+ </p>
64
+ </div>
65
+ """
66
+ )
67
  with gr.Group():
68
  with gr.Box():
69
+ sz = gr.Dropdown(label="Model Size", choices=['base','small', 'medium', 'large'], value='base')
70
+
71
+ link = gr.Textbox(label="YouTube Link")
72
+
73
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
74
+ title = gr.Label(label="Video Title", placeholder="Title")
75
+ img = gr.Image(label="Thumbnail")
76
+
77
+ # text = gr.Textbox(
78
+ # label="Transcription",
79
+ # placeholder="Transcription Output",
80
+ # lines=5)
81
+
82
+ op_video = gr.Video()
83
+
84
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
85
+ btn = gr.Button("Generate Captions")
86
+
87
+ # Events
88
+ btn.click(inference, inputs=[link], outputs=[op_video])
89
+ link.change(populate_metadata, inputs=[link], outputs=[img, title])
90
+ sz.change(change_model, inputs=[sz], outputs=[])
91
+
92
+ block.launch(debug=True,enable_queue=True)