mlkorra commited on
Commit
65d0224
·
1 Parent(s): de29f4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -81
app.py CHANGED
@@ -1,91 +1,65 @@
1
- import gradio as gr
2
- import whisper
3
- from whisper.utils import write_vtt
4
 
5
- from pytube import YouTube
6
  import os
7
  import sys
8
  import subprocess
 
9
 
10
- loaded_model = whisper.load_model("base")
11
- current_size = 'base'
12
-
13
- def inference(link):
14
-
15
- yt = YouTube(link)
16
-
17
- audio_path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
18
- print(f'audio path : {audio_path}')
19
- video_path = yt.streams.filter(file_extension='mp4')
20
-
21
- #options = whisper.DecodingOptions(without_timestamps=True)
22
- options = dict(beam_size=5, best_of=5, fp16 = False)
23
- translate_options = dict(task="inference", **options)
24
- results = loaded_model.transcribe(audio_path,**translate_options)
25
-
26
- output_dir = ''
27
- path = audio_path.split(".")[0]
28
-
29
- with open(os.path.join(output_dir, path + ".vtt"), "w") as vtt:
30
- write_vtt(results["segments"], file=vtt)
31
-
32
- subtitle = path + ".vtt"
33
- output_video = path + "_subtitled.mp4"
34
-
35
- os.system(f"ffmpeg -i {video_path} -vf subtitles={subtitle} {output_video}")
36
-
37
- return output_video
38
-
39
- def change_model(size):
40
- if size == current_size:
41
- return
42
- loaded_model = whisper.load_model(size)
43
- current_size = size
44
-
45
- def populate_metadata(link):
46
- yt = YouTube(link)
47
- return yt.thumbnail_url, yt.title
48
-
49
- title="Youtube Caption Generator"
50
- description="Generate captions of Youtube videos using OpenAI's Whisper"
51
- block = gr.Blocks()
52
 
 
 
 
 
 
 
 
 
 
 
 
53
  with block:
54
- gr.HTML(
55
- """
56
- <div style="text-align: center; max-width: 500px; margin: 0 auto;">
57
- <div>
58
- <h1>Youtube Caption Generator</h1>
59
- </div>
60
- <p style="margin-bottom: 10px; font-size: 94%">
61
- Generate captions of Youtube videos using OpenAI's Whisper
62
- </p>
63
- </div>
64
- """
65
- )
66
  with gr.Group():
67
  with gr.Box():
68
- sz = gr.Dropdown(label="Model Size", choices=['base','small', 'medium', 'large'], value='base')
69
-
70
- link = gr.Textbox(label="YouTube Link")
71
-
72
- with gr.Row().style(mobile_collapse=False, equal_height=True):
73
- title = gr.Label(label="Video Title", placeholder="Title")
74
- img = gr.Image(label="Thumbnail")
75
-
76
- # text = gr.Textbox(
77
- # label="Transcription",
78
- # placeholder="Transcription Output",
79
- # lines=5)
80
-
81
- op_video = gr.Video()
82
 
83
- with gr.Row().style(mobile_collapse=False, equal_height=True):
84
- btn = gr.Button("Generate Captions")
85
-
86
- # Events
87
- btn.click(inference, inputs=[link], outputs=[op_video])
88
- link.change(populate_metadata, inputs=[link], outputs=[img, title])
89
- sz.change(change_model, inputs=[sz], outputs=[])
90
-
91
- block.launch(debug=True,enable_queue=True)
 
 
 
 
1
 
2
+ import gradio as gr
3
  import os
4
  import sys
5
  import subprocess
6
+ #from moviepy.editor import VideoFileClip
7
 
8
+ import whisper
9
+ from whisper.utils import write_vtt
10
+
11
+ model = whisper.load_model("medium")
12
+
13
+ title = "Add Text/Caption to your YouTube Shorts - MultiLingual"
14
+
15
+ def video2mp3(video_file, output_ext="mp3"):
16
+ filename, ext = os.path.splitext(video_file)
17
+ subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
18
+ stdout=subprocess.DEVNULL,
19
+ stderr=subprocess.STDOUT)
20
+ return f"{filename}.{output_ext}"
21
+
22
+
23
+ def translate(input_video):
24
+
25
+ audio_file = video2mp3(input_video)
26
+
27
+ options = dict(beam_size=5, best_of=5, fp16 = False)
28
+ translate_options = dict(task="translate", **options)
29
+ result = model.transcribe(audio_file,**translate_options)
30
+
31
+ output_dir = ''
32
+ audio_path = audio_file.split(".")[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ with open(os.path.join(output_dir, audio_path + ".vtt"), "w") as vtt:
35
+ write_vtt(result["segments"], file=vtt)
36
+
37
+ subtitle = audio_path + ".vtt"
38
+ output_video = audio_path + "_subtitled.mp4"
39
+
40
+ os.system(f"ffmpeg -i {input_video} -vf subtitles={subtitle} {output_video}")
41
+
42
+ return output_video
43
+
44
+ block = gr.Blocks()
45
  with block:
46
+
 
 
 
 
 
 
 
 
 
 
 
47
  with gr.Group():
48
  with gr.Box():
49
+ with gr.Row().style():
50
+ inp_video = gr.Video(
51
+ label="Input Video",
52
+ type="filepath",
53
+ mirror_webcam = False
54
+ )
55
+ op_video = gr.Video()
56
+ btn = gr.Button("Generate Subtitle Video")
 
 
 
 
 
 
57
 
58
+
59
+
60
+
61
+
62
+
63
+ btn.click(translate, inputs=[inp_video], outputs=[op_video])
64
+
65
+ block.launch(enable_queue = True)