Spaces:
Build error
Build error
Alex Volkov
commited on
Commit
·
2e0131e
1
Parent(s):
5efed34
Captions API support
Browse files- app.py +56 -12
- download.py +127 -41
- requirements.txt +3 -2
- utils/apis.py +6 -5
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import gradio
|
2 |
import gradio as gr
|
3 |
|
4 |
-
from download import download_generator
|
5 |
import anvil.media
|
6 |
import os
|
7 |
import dotenv
|
@@ -24,16 +24,29 @@ preload_model: str = args.get("preload")
|
|
24 |
LANG_CHOICES = sorted([x.capitalize() for x in LANGUAGES.values()])
|
25 |
LANG_CHOICES.insert(0, "Autodetect")
|
26 |
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
# download_status = gr.Textbox(label="Status:", value='', lines=1, elem_id="download_status")
|
30 |
download_status = gr.Checkbox(label="", elem_id="download_status", interactive=False)
|
31 |
translate_action = gr.Checkbox(label="Auto translate to english", elem_id='translate_toggle', interactive=True, value=True)
|
32 |
init_video = gr.Video(label="Upload video manually", visible=True, interactive=True, mirror_webcam=False)
|
33 |
init_audio = gr.Audio(label="Downloaded audio", visible=False)
|
34 |
output_text = gr.Textbox(label="Output text", lines=5, visible=False, max_lines=10, interactive=True, elem_id="output_text")
|
|
|
35 |
sub_video = gr.Video(label="Subbed video", visible=False, mirror_webcam=False)
|
36 |
-
|
37 |
|
38 |
def predownload(url, translate_action, source_language):
|
39 |
files = []
|
@@ -54,10 +67,13 @@ def predownload(url, translate_action, source_language):
|
|
54 |
label=f"Subtitles transcribed from {response['whisper_result'].get('language')} (detected language)")
|
55 |
if 'srt_path' in response:
|
56 |
files.append(response["srt_path"])
|
|
|
|
|
57 |
|
58 |
if 'sub_video' in response:
|
59 |
updates_object[sub_video] = gr.update(visible=True, value=response["sub_video"],
|
60 |
label=f"Subbed video: {meta['id']}_translated.mp4")
|
|
|
61 |
files.append(response["sub_video"])
|
62 |
|
63 |
updates_object[output_file] = gr.update(value=files, visible=len(files) > 0, label=f"Output Files")
|
@@ -105,9 +121,10 @@ with gr.Blocks(css='@import "file=static/css/main.css";', theme='darkpeach', tit
|
|
105 |
|
106 |
with gr.Column():
|
107 |
sub_video.render()
|
|
|
108 |
|
109 |
|
110 |
-
outputs = [download_status, init_video, init_audio, output_text, sub_video, output_file ]
|
111 |
inputs = [url_input, translate_action, source_language]
|
112 |
action_btn.click(fn=predownload, inputs=inputs, outputs=outputs, api_name='predownload')
|
113 |
url_input.submit(fn=predownload, inputs=inputs, outputs=outputs)
|
@@ -116,7 +133,7 @@ with gr.Blocks(css='@import "file=static/css/main.css";', theme='darkpeach', tit
|
|
116 |
|
117 |
translate_action.change(fn=lambda x: {action_btn: gr.update(value=f"Translate" if x else "Transcribe")},
|
118 |
inputs=[translate_action], outputs=[action_btn])
|
119 |
-
|
120 |
gr.HTML("""<div class='footer'>
|
121 |
<div class="relative">
|
122 |
<div class="absolute inset-0 flex items-center" aria-hidden="true">
|
@@ -131,10 +148,40 @@ with gr.Blocks(css='@import "file=static/css/main.css";', theme='darkpeach', tit
|
|
131 |
</div>""")
|
132 |
|
133 |
def init_video_manual_upload(url, init_video):
|
134 |
-
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
-
init_video.change(fn=init_video_manual_upload,
|
|
|
|
|
138 |
|
139 |
# Render imported buttons for API bindings
|
140 |
render_api_elements(url_input,download_status, output_text, sub_video, output_file)
|
@@ -142,9 +189,6 @@ with gr.Blocks(css='@import "file=static/css/main.css";', theme='darkpeach', tit
|
|
142 |
queue_placeholder = demo.queue()
|
143 |
|
144 |
|
145 |
-
@anvil.server.callable
|
146 |
-
def temp():
|
147 |
-
return 'temp worked'
|
148 |
|
149 |
if __name__ == "__main__":
|
150 |
gradio.close_all()
|
|
|
1 |
import gradio
|
2 |
import gradio as gr
|
3 |
|
4 |
+
from download import download_generator, user_uploaded_video_generator
|
5 |
import anvil.media
|
6 |
import os
|
7 |
import dotenv
|
|
|
24 |
LANG_CHOICES = sorted([x.capitalize() for x in LANGUAGES.values()])
|
25 |
LANG_CHOICES.insert(0, "Autodetect")
|
26 |
|
27 |
+
VIDEO_HTML = """
|
28 |
+
<video
|
29 |
+
class="video-js"
|
30 |
+
controls
|
31 |
+
preload="auto"
|
32 |
+
width="640"
|
33 |
+
height="264"
|
34 |
+
data-setup='{}'>
|
35 |
+
<source src="{src}" type="video/mp4">
|
36 |
+
<track kind="captions" src="{en_vtt}" srclang="en" label="English" default>
|
37 |
+
</video>
|
38 |
+
"""
|
39 |
+
|
40 |
+
url_input = gr.Textbox(label="Youtube/Twitter/etc video URL (supports many services)", lines=1, elem_id="url_input")
|
41 |
# download_status = gr.Textbox(label="Status:", value='', lines=1, elem_id="download_status")
|
42 |
download_status = gr.Checkbox(label="", elem_id="download_status", interactive=False)
|
43 |
translate_action = gr.Checkbox(label="Auto translate to english", elem_id='translate_toggle', interactive=True, value=True)
|
44 |
init_video = gr.Video(label="Upload video manually", visible=True, interactive=True, mirror_webcam=False)
|
45 |
init_audio = gr.Audio(label="Downloaded audio", visible=False)
|
46 |
output_text = gr.Textbox(label="Output text", lines=5, visible=False, max_lines=10, interactive=True, elem_id="output_text")
|
47 |
+
output_text_2 = gr.Textbox(label="Output text 2", lines=5, visible=False, max_lines=10, interactive=True, elem_id="output_text")
|
48 |
sub_video = gr.Video(label="Subbed video", visible=False, mirror_webcam=False)
|
49 |
+
sub_video_html = gr.HTML(value=f"<div> Please wait for video to load </div>")
|
50 |
|
51 |
def predownload(url, translate_action, source_language):
|
52 |
files = []
|
|
|
67 |
label=f"Subtitles transcribed from {response['whisper_result'].get('language')} (detected language)")
|
68 |
if 'srt_path' in response:
|
69 |
files.append(response["srt_path"])
|
70 |
+
if 'vtt_path' in response:
|
71 |
+
files.append(response["srt_path"])
|
72 |
|
73 |
if 'sub_video' in response:
|
74 |
updates_object[sub_video] = gr.update(visible=True, value=response["sub_video"],
|
75 |
label=f"Subbed video: {meta['id']}_translated.mp4")
|
76 |
+
updates_object[sub_video_html] = gr.update(value=VIDEO_HTML.format(src=f"file={response['sub_video']}") )
|
77 |
files.append(response["sub_video"])
|
78 |
|
79 |
updates_object[output_file] = gr.update(value=files, visible=len(files) > 0, label=f"Output Files")
|
|
|
121 |
|
122 |
with gr.Column():
|
123 |
sub_video.render()
|
124 |
+
sub_video_html.render()
|
125 |
|
126 |
|
127 |
+
outputs = [download_status, init_video, init_audio, output_text, sub_video, output_file, sub_video_html]
|
128 |
inputs = [url_input, translate_action, source_language]
|
129 |
action_btn.click(fn=predownload, inputs=inputs, outputs=outputs, api_name='predownload')
|
130 |
url_input.submit(fn=predownload, inputs=inputs, outputs=outputs)
|
|
|
133 |
|
134 |
translate_action.change(fn=lambda x: {action_btn: gr.update(value=f"Translate" if x else "Transcribe")},
|
135 |
inputs=[translate_action], outputs=[action_btn])
|
136 |
+
examples = gr.Examples([["https://twitter.com/starsonxh/status/1552945347194142720", "Adam"], ["https://twitter.com/starsonxh/status/1552945347194142720", "Eve"]], [url_input, output_text] )
|
137 |
gr.HTML("""<div class='footer'>
|
138 |
<div class="relative">
|
139 |
<div class="absolute inset-0 flex items-center" aria-hidden="true">
|
|
|
148 |
</div>""")
|
149 |
|
150 |
def init_video_manual_upload(url, init_video):
|
151 |
+
if url:
|
152 |
+
return False
|
153 |
+
files = []
|
154 |
+
for response in user_uploaded_video_generator(init_video):
|
155 |
+
updates_object = {}
|
156 |
+
updates_object[download_status] = gr.update(label=f"{response.get('message')}")
|
157 |
+
|
158 |
+
|
159 |
+
|
160 |
+
if 'audio' in response:
|
161 |
+
updates_object[init_audio] = gr.update(visible=True, value=response["audio"],
|
162 |
+
label=f"Extracted audio")
|
163 |
+
files.append(response["audio"])
|
164 |
+
files.append(response["video"])
|
165 |
+
|
166 |
+
|
167 |
+
if 'srt_path' in response:
|
168 |
+
updates_object[output_text] = gr.update(value=response['srt_path'], visible=True)
|
169 |
+
files.append(response["srt_path"])
|
170 |
+
updates_object[sub_video_html] = gr.update(value=VIDEO_HTML % f"file={response['sub_video']}")
|
171 |
+
|
172 |
+
if 'vtt_path' in response:
|
173 |
+
updates_object[output_text_2] = gr.update(value=response['vtt_path'], visible=True)
|
174 |
+
files.append(response["vtt_path"])
|
175 |
+
updates_object[sub_video_html] = gr.update(value=VIDEO_HTML.format(src=f"file={response['sub_video']}", en_vtt=f"file={response['vtt_path']}"))
|
176 |
+
#
|
177 |
+
# updates_object[output_file] = gr.update(value=files, visible=len(files) > 0, label=f"Output Files")
|
178 |
+
|
179 |
+
yield updates_object
|
180 |
+
|
181 |
|
182 |
+
init_video.change(fn=init_video_manual_upload,
|
183 |
+
inputs=[url_input, init_video],
|
184 |
+
outputs=[download_status, init_audio, sub_video_html, output_file])
|
185 |
|
186 |
# Render imported buttons for API bindings
|
187 |
render_api_elements(url_input,download_status, output_text, sub_video, output_file)
|
|
|
189 |
queue_placeholder = demo.queue()
|
190 |
|
191 |
|
|
|
|
|
|
|
192 |
|
193 |
if __name__ == "__main__":
|
194 |
gradio.close_all()
|
download.py
CHANGED
@@ -1,11 +1,12 @@
|
|
|
|
1 |
import sys
|
2 |
import time
|
3 |
from pathlib import Path
|
4 |
import anvil.server
|
5 |
import anvil.media
|
6 |
-
from whisper.utils import write_srt
|
7 |
-
from
|
8 |
-
from
|
9 |
import os
|
10 |
import tempfile
|
11 |
import json
|
@@ -61,6 +62,7 @@ def download_generator(url, translate_action=True, source_language='Autodetect',
|
|
61 |
raise e
|
62 |
|
63 |
srt_path = tempdir / f"{meta['id']}.srt"
|
|
|
64 |
|
65 |
if not corrected_subtitles:
|
66 |
### Step 3 : Transcribe with whisper
|
@@ -70,9 +72,12 @@ def download_generator(url, translate_action=True, source_language='Autodetect',
|
|
70 |
|
71 |
with open(srt_path, "w", encoding="utf-8") as srt:
|
72 |
write_srt(whisper_result["segments"], file=srt)
|
|
|
|
|
73 |
|
74 |
whisper_result["srt"] = Path(srt_path).read_text()
|
75 |
-
|
|
|
76 |
except Exception as e:
|
77 |
os.chdir(original_dir)
|
78 |
yield {"message": f"{e}"}
|
@@ -106,51 +111,95 @@ def download_generator(url, translate_action=True, source_language='Autodetect',
|
|
106 |
yield {"message": f"{e}"}
|
107 |
|
108 |
|
109 |
-
def
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
except Exception as e:
|
118 |
-
print(f"Could not download file: {e}")
|
119 |
-
raise
|
120 |
-
|
121 |
-
try:
|
122 |
-
print(f"Starting audio only download with URL {tweet_url}, this may take a while")
|
123 |
-
meta, video, audio = download(tweet_url, tempdir, keepVideo=False)
|
124 |
-
print(f"Downloaded video and extracted audio")
|
125 |
-
except Exception as e:
|
126 |
-
print(f"Could not download file: {e}")
|
127 |
-
raise
|
128 |
|
129 |
# Run whisper on the audio with language unless auto
|
130 |
try:
|
131 |
-
|
132 |
-
|
|
|
|
|
133 |
detected_language = LANGUAGES[transcribe_whisper_result["language"]]
|
134 |
-
translate_whisper_result = transcribe(
|
135 |
-
|
136 |
-
en_srt = get_srt(translate_whisper_result["segments"])
|
137 |
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
except Exception as e:
|
140 |
print(f"Could not transcribe file: {e}")
|
141 |
return
|
142 |
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
"
|
148 |
-
|
149 |
-
"
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
# Run whisper with translation task enabled (and save to different srt file)
|
156 |
# Call anvil background task with both files, and both the plain texts
|
@@ -164,7 +213,7 @@ def progress_hook(d):
|
|
164 |
print(filename)
|
165 |
yield f"Downloaded {filename}"
|
166 |
|
167 |
-
def download(url, tempdir, format="bestvideo[ext=mp4]+bestaudio/best", verbose=False, keepVideo=True):
|
168 |
try:
|
169 |
ydl_opts = {
|
170 |
"format": format,
|
@@ -175,10 +224,10 @@ def download(url, tempdir, format="bestvideo[ext=mp4]+bestaudio/best", verbose=F
|
|
175 |
'preferredquality': '192',
|
176 |
}],
|
177 |
"skip_download": False,
|
178 |
-
"outtmpl": f"{tempdir}
|
179 |
"noplaylist": True,
|
180 |
"verbose": verbose,
|
181 |
-
"quiet":
|
182 |
"progress_hooks": [progress_hook],
|
183 |
|
184 |
}
|
@@ -197,6 +246,35 @@ def download(url, tempdir, format="bestvideo[ext=mp4]+bestaudio/best", verbose=F
|
|
197 |
else:
|
198 |
return meta, None, str(audio.resolve())
|
199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
201 |
def check_download(url):
|
202 |
ydl_opts = {
|
@@ -217,6 +295,14 @@ def check_download(url):
|
|
217 |
return meta
|
218 |
|
219 |
def transcribe(audio, translate_action=True, language='Autodetect', override_model_size=''):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
task = "translate" if translate_action else "transcribe"
|
221 |
model_size_to_load = override_model_size if override_model_size else model_size
|
222 |
print(f'Starting {task} with whisper size {model_size_to_load} on {audio}')
|
|
|
1 |
+
import shutil
|
2 |
import sys
|
3 |
import time
|
4 |
from pathlib import Path
|
5 |
import anvil.server
|
6 |
import anvil.media
|
7 |
+
from whisper.utils import write_srt, write_vtt
|
8 |
+
from yt_dlp import YoutubeDL
|
9 |
+
from yt_dlp.utils import DownloadError
|
10 |
import os
|
11 |
import tempfile
|
12 |
import json
|
|
|
62 |
raise e
|
63 |
|
64 |
srt_path = tempdir / f"{meta['id']}.srt"
|
65 |
+
vtt_path = tempdir / f"{meta['id']}.vtt"
|
66 |
|
67 |
if not corrected_subtitles:
|
68 |
### Step 3 : Transcribe with whisper
|
|
|
72 |
|
73 |
with open(srt_path, "w", encoding="utf-8") as srt:
|
74 |
write_srt(whisper_result["segments"], file=srt)
|
75 |
+
with open(vtt_path, "w", encoding="utf-8") as vtt:
|
76 |
+
write_vtt(whisper_result["segments"], file=vtt)
|
77 |
|
78 |
whisper_result["srt"] = Path(srt_path).read_text()
|
79 |
+
whisper_result["vtt"] = Path(vtt_path).read_text()
|
80 |
+
yield {"message": f"Transcribe successful", "whisper_result": whisper_result, "meta": meta, "srt_path": srt_path, "vtt_path": vtt_path}
|
81 |
except Exception as e:
|
82 |
os.chdir(original_dir)
|
83 |
yield {"message": f"{e}"}
|
|
|
111 |
yield {"message": f"{e}"}
|
112 |
|
113 |
|
114 |
+
def user_uploaded_video_generator(video, translate_action=True, source_language='Autodetect', corrected_subtitles=None):
|
115 |
+
video_name = Path(video).stem
|
116 |
+
# create tempdir
|
117 |
+
tempdir = output_dir / video_name
|
118 |
+
tempdir.mkdir(parents=True, exist_ok=True)
|
119 |
+
# copy video with shutil.copy2
|
120 |
+
video_path = tempdir / Path(video).name
|
121 |
+
shutil.copy2(video, video_path)
|
122 |
|
123 |
+
yield {"message": f"Extracting audio from {video_name}", "video": video_path}
|
124 |
+
# TODO : extract audio from videos
|
125 |
+
output_audio = tempdir / f"{video_name}.mp3"
|
126 |
+
ffmpeg.input(video_path).output(filename=output_audio).run()
|
127 |
+
yield {"message": f"Got audio from {video_name}", "video": video, "audio": output_audio}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
# Run whisper on the audio with language unless auto
|
130 |
try:
|
131 |
+
audio_file = output_audio
|
132 |
+
print(f"Starting whisper transcribe with {output_audio}")
|
133 |
+
transcribe_whisper_result = transcribe(audio_file, translate_action=False, language='Autodetect', override_model_size=model_size)
|
134 |
+
yield {"message": f"Finished transcription, starting translation to {transcribe_whisper_result['language']}"}
|
135 |
detected_language = LANGUAGES[transcribe_whisper_result["language"]]
|
136 |
+
translate_whisper_result = transcribe(audio_file, translate_action=True, language=detected_language, override_model_size=model_size)
|
137 |
+
yield {"message": f"Finished translation to English, preparing subtitle files"}
|
|
|
138 |
|
139 |
+
with open(tempdir / f"{video_name}.vtt", "w", encoding="utf-8") as vtt:
|
140 |
+
write_vtt(transcribe_whisper_result['segments'], file=vtt)
|
141 |
+
|
142 |
+
|
143 |
+
# yield {"message": f"Created VTT files", "vtt_path": f"{video_name}.vtt", "vtt_en_path": f"{video_name}.en.vtt"}
|
144 |
+
# write_srt(transcribe_whisper_result['segments'], tempdir / f"{video_name}.srt")
|
145 |
+
# write_srt(translate_whisper_result['segments'], tempdir / f"{video_name}_en.srt")
|
146 |
+
# yield {"message": f"Created SRT files", "srt_path": f"{video_name}.srt", "srt_en_path": f"{video_name}.en.srt"}
|
147 |
+
# print(f"Transcribe successful!")
|
148 |
except Exception as e:
|
149 |
print(f"Could not transcribe file: {e}")
|
150 |
return
|
151 |
|
152 |
+
def caption_generator(social_media_url,uid, language="Autodetect", model_size=model_size):
|
153 |
+
with tempfile.TemporaryDirectory() as tempdir:
|
154 |
+
tempdir = Path(tempdir)
|
155 |
+
# try:
|
156 |
+
# print(f"Downloading {social_media_url} ")
|
157 |
+
# meta = check_download(social_media_url)
|
158 |
+
# print(f"Downloaded {meta['id']}.mp3 from {meta['uploader_id']} and url {meta['webpage_url']}")
|
159 |
+
# except Exception as e:
|
160 |
+
# print(f"Could not download file: {e}")
|
161 |
+
# raise
|
162 |
|
163 |
+
try:
|
164 |
+
print(f"Starting audio only download with URL {social_media_url}, this may take a while")
|
165 |
+
meta, audio = download_audio(social_media_url, tempdir, id=uid)
|
166 |
+
print(f"Downloaded video and extracted audio")
|
167 |
+
except Exception as e:
|
168 |
+
print(f"Could not download file: {e}")
|
169 |
+
raise
|
170 |
+
|
171 |
+
# Run whisper on the audio with language unless auto
|
172 |
+
try:
|
173 |
+
print(f"Starting whisper transcribe with {uid}.mp3")
|
174 |
+
transcribe_whisper_result = transcribe(audio, translate_action=False, language=language, override_model_size=model_size)
|
175 |
+
detected_language = LANGUAGES[transcribe_whisper_result["language"]]
|
176 |
+
translate_whisper_result = transcribe(audio, translate_action=True, language=detected_language, override_model_size=model_size)
|
177 |
+
print(f"Transcribe successful!, writing files")
|
178 |
+
vtt_path = tempdir / f"{transcribe_whisper_result['language']}.vtt"
|
179 |
+
en_vtt_path = tempdir / f"en.vtt"
|
180 |
+
|
181 |
+
with open(vtt_path.resolve(), "w", encoding="utf-8") as vtt:
|
182 |
+
write_vtt(transcribe_whisper_result["segments"], file=vtt)
|
183 |
+
|
184 |
+
with open(en_vtt_path.resolve(), "w", encoding="utf-8") as en_vtt:
|
185 |
+
write_vtt(transcribe_whisper_result["segments"], file=en_vtt)
|
186 |
+
|
187 |
+
except Exception as e:
|
188 |
+
print(f"Could not transcribe file: {e}")
|
189 |
+
return
|
190 |
+
|
191 |
+
whisper_result_captions = [
|
192 |
+
{
|
193 |
+
"language_tag": transcribe_whisper_result["language"],
|
194 |
+
"vtt_file": anvil.BlobMedia(content_type="text/plain", content=vtt_path.read_bytes(), name=f"{uid}.{transcribe_whisper_result['language']}.vtt")
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"language_tag": "en",
|
198 |
+
"vtt_file": anvil.BlobMedia(content_type="text/plain", content=vtt_path.read_bytes(), name=f"{uid}.en.vtt")
|
199 |
+
}
|
200 |
+
]
|
201 |
+
|
202 |
+
return 'success', whisper_result_captions
|
203 |
|
204 |
# Run whisper with translation task enabled (and save to different srt file)
|
205 |
# Call anvil background task with both files, and both the plain texts
|
|
|
213 |
print(filename)
|
214 |
yield f"Downloaded {filename}"
|
215 |
|
216 |
+
def download(url, tempdir, format="bestvideo[ext=mp4]+bestaudio/best", verbose=False, keepVideo=True, filename="%(id)s.%(ext)s"):
|
217 |
try:
|
218 |
ydl_opts = {
|
219 |
"format": format,
|
|
|
224 |
'preferredquality': '192',
|
225 |
}],
|
226 |
"skip_download": False,
|
227 |
+
"outtmpl": f"{tempdir}/{filename}",
|
228 |
"noplaylist": True,
|
229 |
"verbose": verbose,
|
230 |
+
"quiet": False,
|
231 |
"progress_hooks": [progress_hook],
|
232 |
|
233 |
}
|
|
|
246 |
else:
|
247 |
return meta, None, str(audio.resolve())
|
248 |
|
249 |
+
def download_audio(url, tempdir, format="bestaudio/best", verbose=False, id=None):
|
250 |
+
filename = f"{id}.%(ext)s"
|
251 |
+
try:
|
252 |
+
ydl_opts = {
|
253 |
+
"format": format,
|
254 |
+
"keepvideo": False,
|
255 |
+
'postprocessors': [{
|
256 |
+
'key': 'FFmpegExtractAudio',
|
257 |
+
'preferredcodec': 'mp3',
|
258 |
+
'preferredquality': '192',
|
259 |
+
}],
|
260 |
+
"skip_download": False,
|
261 |
+
"outtmpl": f"{tempdir}/{filename}",
|
262 |
+
"noplaylist": True,
|
263 |
+
"verbose": verbose,
|
264 |
+
"quiet": False,
|
265 |
+
"progress_hooks": [progress_hook],
|
266 |
+
|
267 |
+
}
|
268 |
+
ydl = YoutubeDL(ydl_opts)
|
269 |
+
meta = ydl.extract_info(
|
270 |
+
url,
|
271 |
+
download=True,
|
272 |
+
)
|
273 |
+
except DownloadError as e:
|
274 |
+
raise e
|
275 |
+
else:
|
276 |
+
audio = tempdir / f"{id}.mp3"
|
277 |
+
return meta, str(audio.resolve())
|
278 |
|
279 |
def check_download(url):
|
280 |
ydl_opts = {
|
|
|
295 |
return meta
|
296 |
|
297 |
def transcribe(audio, translate_action=True, language='Autodetect', override_model_size=''):
|
298 |
+
"""
|
299 |
+
Transcribe audio file with whisper
|
300 |
+
:param audio: - The audio file to transcribe
|
301 |
+
:param translate_action: Bool - Whether to translate to English or keep original language
|
302 |
+
:param language: String - The language to transcribe to, default is Autodetect
|
303 |
+
:param override_model_size: Bool - Whether to override the model size
|
304 |
+
:return:
|
305 |
+
"""
|
306 |
task = "translate" if translate_action else "transcribe"
|
307 |
model_size_to_load = override_model_size if override_model_size else model_size
|
308 |
print(f'Starting {task} with whisper size {model_size_to_load} on {audio}')
|
requirements.txt
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
-
youtube-dl==2021.12.17
|
2 |
whisper @ git+https://github.com/openai/whisper.git@main#egg=whisper==1.1.5
|
3 |
anvil-uplink==0.4.0
|
4 |
gradio==3.4.0
|
5 |
python-dotenv==0.21.0
|
6 |
aiohttp==3.8.3
|
7 |
aiohttp-requests==0.1.3
|
8 |
-
fsspec=2022.8.2
|
|
|
|
1 |
+
youtube-dl==2021.12.17 #remove this, moved to yt-dlp
|
2 |
whisper @ git+https://github.com/openai/whisper.git@main#egg=whisper==1.1.5
|
3 |
anvil-uplink==0.4.0
|
4 |
gradio==3.4.0
|
5 |
python-dotenv==0.21.0
|
6 |
aiohttp==3.8.3
|
7 |
aiohttp-requests==0.1.3
|
8 |
+
fsspec=2022.8.2
|
9 |
+
yt-dlp==2022.10.4
|
utils/apis.py
CHANGED
@@ -15,7 +15,7 @@ from download import download_generator, caption_generator
|
|
15 |
|
16 |
dotenv.load_dotenv()
|
17 |
|
18 |
-
|
19 |
@anvil.server.callable
|
20 |
def call_gradio_api(api_name='test_api', data=()):
|
21 |
port = os.environ.get('SERVER_PORT', 8111)
|
@@ -64,16 +64,16 @@ def test_api(url=''):
|
|
64 |
return f"I've slept for 15 seconds and now I'm done. "
|
65 |
|
66 |
#TODO: add telegram error handler here
|
67 |
-
def caption(
|
68 |
"""
|
69 |
:param media_id: The twitter media ID object
|
70 |
:param user_id_str: The twitter user ID string
|
71 |
:param tweet_url: tweet URL can potentially not exist in the future, so we can upload on behalf of the user
|
72 |
:return:
|
73 |
"""
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
|
78 |
def render_api_elements(url_input, download_status, output_text, sub_video, output_file):
|
79 |
with gr.Group(elem_id='fake_ass_group') as api_buttons:
|
@@ -97,6 +97,7 @@ def render_api_elements(url_input, download_status, output_text, sub_video, outp
|
|
97 |
fn=caption,
|
98 |
inputs=[
|
99 |
gr.Text(label='tweet_url'),
|
|
|
100 |
gr.Text(label='language (optional)'),
|
101 |
gr.Dropdown(label='Model Size', choices=['base', 'tiny', 'small', 'medium', 'large']),
|
102 |
],
|
|
|
15 |
|
16 |
dotenv.load_dotenv()
|
17 |
|
18 |
+
@anvil.server.background_task
|
19 |
@anvil.server.callable
|
20 |
def call_gradio_api(api_name='test_api', data=()):
|
21 |
port = os.environ.get('SERVER_PORT', 8111)
|
|
|
64 |
return f"I've slept for 15 seconds and now I'm done. "
|
65 |
|
66 |
#TODO: add telegram error handler here
|
67 |
+
def caption(downloadable_url="",uid="", language="Autodetect", override_model_size=""):
|
68 |
"""
|
69 |
:param media_id: The twitter media ID object
|
70 |
:param user_id_str: The twitter user ID string
|
71 |
:param tweet_url: tweet URL can potentially not exist in the future, so we can upload on behalf of the user
|
72 |
:return:
|
73 |
"""
|
74 |
+
status, whisper_result_captions = caption_generator(downloadable_url, uid, language, override_model_size)
|
75 |
+
anvil.server.launch_background_task('add_captions_to_video', uid, whisper_result_captions)
|
76 |
+
return {'status': status, 'message': 'started a background process to upload subtitles to {uid}' }
|
77 |
|
78 |
def render_api_elements(url_input, download_status, output_text, sub_video, output_file):
|
79 |
with gr.Group(elem_id='fake_ass_group') as api_buttons:
|
|
|
97 |
fn=caption,
|
98 |
inputs=[
|
99 |
gr.Text(label='tweet_url'),
|
100 |
+
gr.Text(label='media_uid'),
|
101 |
gr.Text(label='language (optional)'),
|
102 |
gr.Dropdown(label='Model Size', choices=['base', 'tiny', 'small', 'medium', 'large']),
|
103 |
],
|