Alex Volkov commited on
Commit
2e0131e
·
1 Parent(s): 5efed34

Captions API support

Browse files
Files changed (4) hide show
  1. app.py +56 -12
  2. download.py +127 -41
  3. requirements.txt +3 -2
  4. utils/apis.py +6 -5
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio
2
  import gradio as gr
3
 
4
- from download import download_generator
5
  import anvil.media
6
  import os
7
  import dotenv
@@ -24,16 +24,29 @@ preload_model: str = args.get("preload")
24
  LANG_CHOICES = sorted([x.capitalize() for x in LANGUAGES.values()])
25
  LANG_CHOICES.insert(0, "Autodetect")
26
 
27
-
28
- url_input = gr.Textbox(label="Youtube/Twitter/etc video URL (supports many services)", value='https://twitter.com/starsonxh/status/1552945347194142720', lines=1, elem_id="url_input")
 
 
 
 
 
 
 
 
 
 
 
 
29
  # download_status = gr.Textbox(label="Status:", value='', lines=1, elem_id="download_status")
30
  download_status = gr.Checkbox(label="", elem_id="download_status", interactive=False)
31
  translate_action = gr.Checkbox(label="Auto translate to english", elem_id='translate_toggle', interactive=True, value=True)
32
  init_video = gr.Video(label="Upload video manually", visible=True, interactive=True, mirror_webcam=False)
33
  init_audio = gr.Audio(label="Downloaded audio", visible=False)
34
  output_text = gr.Textbox(label="Output text", lines=5, visible=False, max_lines=10, interactive=True, elem_id="output_text")
 
35
  sub_video = gr.Video(label="Subbed video", visible=False, mirror_webcam=False)
36
-
37
 
38
  def predownload(url, translate_action, source_language):
39
  files = []
@@ -54,10 +67,13 @@ def predownload(url, translate_action, source_language):
54
  label=f"Subtitles transcribed from {response['whisper_result'].get('language')} (detected language)")
55
  if 'srt_path' in response:
56
  files.append(response["srt_path"])
 
 
57
 
58
  if 'sub_video' in response:
59
  updates_object[sub_video] = gr.update(visible=True, value=response["sub_video"],
60
  label=f"Subbed video: {meta['id']}_translated.mp4")
 
61
  files.append(response["sub_video"])
62
 
63
  updates_object[output_file] = gr.update(value=files, visible=len(files) > 0, label=f"Output Files")
@@ -105,9 +121,10 @@ with gr.Blocks(css='@import "file=static/css/main.css";', theme='darkpeach', tit
105
 
106
  with gr.Column():
107
  sub_video.render()
 
108
 
109
 
110
- outputs = [download_status, init_video, init_audio, output_text, sub_video, output_file ]
111
  inputs = [url_input, translate_action, source_language]
112
  action_btn.click(fn=predownload, inputs=inputs, outputs=outputs, api_name='predownload')
113
  url_input.submit(fn=predownload, inputs=inputs, outputs=outputs)
@@ -116,7 +133,7 @@ with gr.Blocks(css='@import "file=static/css/main.css";', theme='darkpeach', tit
116
 
117
  translate_action.change(fn=lambda x: {action_btn: gr.update(value=f"Translate" if x else "Transcribe")},
118
  inputs=[translate_action], outputs=[action_btn])
119
-
120
  gr.HTML("""<div class='footer'>
121
  <div class="relative">
122
  <div class="absolute inset-0 flex items-center" aria-hidden="true">
@@ -131,10 +148,40 @@ with gr.Blocks(css='@import "file=static/css/main.css";', theme='darkpeach', tit
131
  </div>""")
132
 
133
  def init_video_manual_upload(url, init_video):
134
- print(url)
135
- print(init_video)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
- init_video.change(fn=init_video_manual_upload, inputs=[url_input, init_video], outputs=[])
 
 
138
 
139
  # Render imported buttons for API bindings
140
  render_api_elements(url_input,download_status, output_text, sub_video, output_file)
@@ -142,9 +189,6 @@ with gr.Blocks(css='@import "file=static/css/main.css";', theme='darkpeach', tit
142
  queue_placeholder = demo.queue()
143
 
144
 
145
- @anvil.server.callable
146
- def temp():
147
- return 'temp worked'
148
 
149
  if __name__ == "__main__":
150
  gradio.close_all()
 
1
  import gradio
2
  import gradio as gr
3
 
4
+ from download import download_generator, user_uploaded_video_generator
5
  import anvil.media
6
  import os
7
  import dotenv
 
24
  LANG_CHOICES = sorted([x.capitalize() for x in LANGUAGES.values()])
25
  LANG_CHOICES.insert(0, "Autodetect")
26
 
27
+ VIDEO_HTML = """
28
+ <video
29
+ class="video-js"
30
+ controls
31
+ preload="auto"
32
+ width="640"
33
+ height="264"
34
+ data-setup='{}'>
35
+ <source src="{src}" type="video/mp4">
36
+ <track kind="captions" src="{en_vtt}" srclang="en" label="English" default>
37
+ </video>
38
+ """
39
+
40
+ url_input = gr.Textbox(label="Youtube/Twitter/etc video URL (supports many services)", lines=1, elem_id="url_input")
41
  # download_status = gr.Textbox(label="Status:", value='', lines=1, elem_id="download_status")
42
  download_status = gr.Checkbox(label="", elem_id="download_status", interactive=False)
43
  translate_action = gr.Checkbox(label="Auto translate to english", elem_id='translate_toggle', interactive=True, value=True)
44
  init_video = gr.Video(label="Upload video manually", visible=True, interactive=True, mirror_webcam=False)
45
  init_audio = gr.Audio(label="Downloaded audio", visible=False)
46
  output_text = gr.Textbox(label="Output text", lines=5, visible=False, max_lines=10, interactive=True, elem_id="output_text")
47
+ output_text_2 = gr.Textbox(label="Output text 2", lines=5, visible=False, max_lines=10, interactive=True, elem_id="output_text")
48
  sub_video = gr.Video(label="Subbed video", visible=False, mirror_webcam=False)
49
+ sub_video_html = gr.HTML(value=f"<div> Please wait for video to load </div>")
50
 
51
  def predownload(url, translate_action, source_language):
52
  files = []
 
67
  label=f"Subtitles transcribed from {response['whisper_result'].get('language')} (detected language)")
68
  if 'srt_path' in response:
69
  files.append(response["srt_path"])
70
+ if 'vtt_path' in response:
71
+ files.append(response["srt_path"])
72
 
73
  if 'sub_video' in response:
74
  updates_object[sub_video] = gr.update(visible=True, value=response["sub_video"],
75
  label=f"Subbed video: {meta['id']}_translated.mp4")
76
+ updates_object[sub_video_html] = gr.update(value=VIDEO_HTML.format(src=f"file={response['sub_video']}") )
77
  files.append(response["sub_video"])
78
 
79
  updates_object[output_file] = gr.update(value=files, visible=len(files) > 0, label=f"Output Files")
 
121
 
122
  with gr.Column():
123
  sub_video.render()
124
+ sub_video_html.render()
125
 
126
 
127
+ outputs = [download_status, init_video, init_audio, output_text, sub_video, output_file, sub_video_html]
128
  inputs = [url_input, translate_action, source_language]
129
  action_btn.click(fn=predownload, inputs=inputs, outputs=outputs, api_name='predownload')
130
  url_input.submit(fn=predownload, inputs=inputs, outputs=outputs)
 
133
 
134
  translate_action.change(fn=lambda x: {action_btn: gr.update(value=f"Translate" if x else "Transcribe")},
135
  inputs=[translate_action], outputs=[action_btn])
136
+ examples = gr.Examples([["https://twitter.com/starsonxh/status/1552945347194142720", "Adam"], ["https://twitter.com/starsonxh/status/1552945347194142720", "Eve"]], [url_input, output_text] )
137
  gr.HTML("""<div class='footer'>
138
  <div class="relative">
139
  <div class="absolute inset-0 flex items-center" aria-hidden="true">
 
148
  </div>""")
149
 
150
  def init_video_manual_upload(url, init_video):
151
+ if url:
152
+ return False
153
+ files = []
154
+ for response in user_uploaded_video_generator(init_video):
155
+ updates_object = {}
156
+ updates_object[download_status] = gr.update(label=f"{response.get('message')}")
157
+
158
+
159
+
160
+ if 'audio' in response:
161
+ updates_object[init_audio] = gr.update(visible=True, value=response["audio"],
162
+ label=f"Extracted audio")
163
+ files.append(response["audio"])
164
+ files.append(response["video"])
165
+
166
+
167
+ if 'srt_path' in response:
168
+ updates_object[output_text] = gr.update(value=response['srt_path'], visible=True)
169
+ files.append(response["srt_path"])
170
+ updates_object[sub_video_html] = gr.update(value=VIDEO_HTML % f"file={response['sub_video']}")
171
+
172
+ if 'vtt_path' in response:
173
+ updates_object[output_text_2] = gr.update(value=response['vtt_path'], visible=True)
174
+ files.append(response["vtt_path"])
175
+ updates_object[sub_video_html] = gr.update(value=VIDEO_HTML.format(src=f"file={response['sub_video']}", en_vtt=f"file={response['vtt_path']}"))
176
+ #
177
+ # updates_object[output_file] = gr.update(value=files, visible=len(files) > 0, label=f"Output Files")
178
+
179
+ yield updates_object
180
+
181
 
182
+ init_video.change(fn=init_video_manual_upload,
183
+ inputs=[url_input, init_video],
184
+ outputs=[download_status, init_audio, sub_video_html, output_file])
185
 
186
  # Render imported buttons for API bindings
187
  render_api_elements(url_input,download_status, output_text, sub_video, output_file)
 
189
  queue_placeholder = demo.queue()
190
 
191
 
 
 
 
192
 
193
  if __name__ == "__main__":
194
  gradio.close_all()
download.py CHANGED
@@ -1,11 +1,12 @@
 
1
  import sys
2
  import time
3
  from pathlib import Path
4
  import anvil.server
5
  import anvil.media
6
- from whisper.utils import write_srt
7
- from youtube_dl import YoutubeDL
8
- from youtube_dl.utils import DownloadError
9
  import os
10
  import tempfile
11
  import json
@@ -61,6 +62,7 @@ def download_generator(url, translate_action=True, source_language='Autodetect',
61
  raise e
62
 
63
  srt_path = tempdir / f"{meta['id']}.srt"
 
64
 
65
  if not corrected_subtitles:
66
  ### Step 3 : Transcribe with whisper
@@ -70,9 +72,12 @@ def download_generator(url, translate_action=True, source_language='Autodetect',
70
 
71
  with open(srt_path, "w", encoding="utf-8") as srt:
72
  write_srt(whisper_result["segments"], file=srt)
 
 
73
 
74
  whisper_result["srt"] = Path(srt_path).read_text()
75
- yield {"message": f"Transcribe successful", "whisper_result": whisper_result, "meta": meta, "srt_path": srt_path}
 
76
  except Exception as e:
77
  os.chdir(original_dir)
78
  yield {"message": f"{e}"}
@@ -106,51 +111,95 @@ def download_generator(url, translate_action=True, source_language='Autodetect',
106
  yield {"message": f"{e}"}
107
 
108
 
109
- def caption_generator(tweet_url, language="Autodetect", model_size=model_size):
110
- # Download the file
 
 
 
 
 
 
111
 
112
- try:
113
- print(f"Downloading {tweet_url} ")
114
- meta = check_download(tweet_url)
115
- tempdir = output_dir / f"{meta['id']}"
116
- print(f"Downloaded {meta['id']}.mp3 from {meta['uploader_id']} and url {meta['webpage_url']}")
117
- except Exception as e:
118
- print(f"Could not download file: {e}")
119
- raise
120
-
121
- try:
122
- print(f"Starting audio only download with URL {tweet_url}, this may take a while")
123
- meta, video, audio = download(tweet_url, tempdir, keepVideo=False)
124
- print(f"Downloaded video and extracted audio")
125
- except Exception as e:
126
- print(f"Could not download file: {e}")
127
- raise
128
 
129
  # Run whisper on the audio with language unless auto
130
  try:
131
- print(f"Starting whisper transcribe with {meta['id']}.mp3")
132
- transcribe_whisper_result = transcribe(audio, translate_action=False, language=language, override_model_size=model_size)
 
 
133
  detected_language = LANGUAGES[transcribe_whisper_result["language"]]
134
- translate_whisper_result = transcribe(audio, translate_action=True, language=detected_language, override_model_size=model_size)
135
- srt = get_srt(transcribe_whisper_result["segments"])
136
- en_srt = get_srt(translate_whisper_result["segments"])
137
 
138
- print(f"Transcribe successful!")
 
 
 
 
 
 
 
 
139
  except Exception as e:
140
  print(f"Could not transcribe file: {e}")
141
  return
142
 
143
- return_dict = {
144
- "detected_language": LANGUAGES[transcribe_whisper_result["language"]],
145
- "requested_language": language,
146
- "text": transcribe_whisper_result["text"],
147
- "en_text": translate_whisper_result["text"],
148
- "srt": srt,
149
- "en_srt": en_srt,
150
- "meta": meta,
151
- }
152
- return return_dict
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  # Run whisper with translation task enabled (and save to different srt file)
156
  # Call anvil background task with both files, and both the plain texts
@@ -164,7 +213,7 @@ def progress_hook(d):
164
  print(filename)
165
  yield f"Downloaded {filename}"
166
 
167
- def download(url, tempdir, format="bestvideo[ext=mp4]+bestaudio/best", verbose=False, keepVideo=True):
168
  try:
169
  ydl_opts = {
170
  "format": format,
@@ -175,10 +224,10 @@ def download(url, tempdir, format="bestvideo[ext=mp4]+bestaudio/best", verbose=F
175
  'preferredquality': '192',
176
  }],
177
  "skip_download": False,
178
- "outtmpl": f"{tempdir}/%(id)s.%(ext)s",
179
  "noplaylist": True,
180
  "verbose": verbose,
181
- "quiet": True,
182
  "progress_hooks": [progress_hook],
183
 
184
  }
@@ -197,6 +246,35 @@ def download(url, tempdir, format="bestvideo[ext=mp4]+bestaudio/best", verbose=F
197
  else:
198
  return meta, None, str(audio.resolve())
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
  def check_download(url):
202
  ydl_opts = {
@@ -217,6 +295,14 @@ def check_download(url):
217
  return meta
218
 
219
  def transcribe(audio, translate_action=True, language='Autodetect', override_model_size=''):
 
 
 
 
 
 
 
 
220
  task = "translate" if translate_action else "transcribe"
221
  model_size_to_load = override_model_size if override_model_size else model_size
222
  print(f'Starting {task} with whisper size {model_size_to_load} on {audio}')
 
1
+ import shutil
2
  import sys
3
  import time
4
  from pathlib import Path
5
  import anvil.server
6
  import anvil.media
7
+ from whisper.utils import write_srt, write_vtt
8
+ from yt_dlp import YoutubeDL
9
+ from yt_dlp.utils import DownloadError
10
  import os
11
  import tempfile
12
  import json
 
62
  raise e
63
 
64
  srt_path = tempdir / f"{meta['id']}.srt"
65
+ vtt_path = tempdir / f"{meta['id']}.vtt"
66
 
67
  if not corrected_subtitles:
68
  ### Step 3 : Transcribe with whisper
 
72
 
73
  with open(srt_path, "w", encoding="utf-8") as srt:
74
  write_srt(whisper_result["segments"], file=srt)
75
+ with open(vtt_path, "w", encoding="utf-8") as vtt:
76
+ write_vtt(whisper_result["segments"], file=vtt)
77
 
78
  whisper_result["srt"] = Path(srt_path).read_text()
79
+ whisper_result["vtt"] = Path(vtt_path).read_text()
80
+ yield {"message": f"Transcribe successful", "whisper_result": whisper_result, "meta": meta, "srt_path": srt_path, "vtt_path": vtt_path}
81
  except Exception as e:
82
  os.chdir(original_dir)
83
  yield {"message": f"{e}"}
 
111
  yield {"message": f"{e}"}
112
 
113
 
114
+ def user_uploaded_video_generator(video, translate_action=True, source_language='Autodetect', corrected_subtitles=None):
115
+ video_name = Path(video).stem
116
+ # create tempdir
117
+ tempdir = output_dir / video_name
118
+ tempdir.mkdir(parents=True, exist_ok=True)
119
+ # copy video with shutil.copy2
120
+ video_path = tempdir / Path(video).name
121
+ shutil.copy2(video, video_path)
122
 
123
+ yield {"message": f"Extracting audio from {video_name}", "video": video_path}
124
+ # TODO : extract audio from videos
125
+ output_audio = tempdir / f"{video_name}.mp3"
126
+ ffmpeg.input(video_path).output(filename=output_audio).run()
127
+ yield {"message": f"Got audio from {video_name}", "video": video, "audio": output_audio}
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  # Run whisper on the audio with language unless auto
130
  try:
131
+ audio_file = output_audio
132
+ print(f"Starting whisper transcribe with {output_audio}")
133
+ transcribe_whisper_result = transcribe(audio_file, translate_action=False, language='Autodetect', override_model_size=model_size)
134
+ yield {"message": f"Finished transcription, starting translation to {transcribe_whisper_result['language']}"}
135
  detected_language = LANGUAGES[transcribe_whisper_result["language"]]
136
+ translate_whisper_result = transcribe(audio_file, translate_action=True, language=detected_language, override_model_size=model_size)
137
+ yield {"message": f"Finished translation to English, preparing subtitle files"}
 
138
 
139
+ with open(tempdir / f"{video_name}.vtt", "w", encoding="utf-8") as vtt:
140
+ write_vtt(transcribe_whisper_result['segments'], file=vtt)
141
+
142
+
143
+ # yield {"message": f"Created VTT files", "vtt_path": f"{video_name}.vtt", "vtt_en_path": f"{video_name}.en.vtt"}
144
+ # write_srt(transcribe_whisper_result['segments'], tempdir / f"{video_name}.srt")
145
+ # write_srt(translate_whisper_result['segments'], tempdir / f"{video_name}_en.srt")
146
+ # yield {"message": f"Created SRT files", "srt_path": f"{video_name}.srt", "srt_en_path": f"{video_name}.en.srt"}
147
+ # print(f"Transcribe successful!")
148
  except Exception as e:
149
  print(f"Could not transcribe file: {e}")
150
  return
151
 
152
+ def caption_generator(social_media_url,uid, language="Autodetect", model_size=model_size):
153
+ with tempfile.TemporaryDirectory() as tempdir:
154
+ tempdir = Path(tempdir)
155
+ # try:
156
+ # print(f"Downloading {social_media_url} ")
157
+ # meta = check_download(social_media_url)
158
+ # print(f"Downloaded {meta['id']}.mp3 from {meta['uploader_id']} and url {meta['webpage_url']}")
159
+ # except Exception as e:
160
+ # print(f"Could not download file: {e}")
161
+ # raise
162
 
163
+ try:
164
+ print(f"Starting audio only download with URL {social_media_url}, this may take a while")
165
+ meta, audio = download_audio(social_media_url, tempdir, id=uid)
166
+ print(f"Downloaded video and extracted audio")
167
+ except Exception as e:
168
+ print(f"Could not download file: {e}")
169
+ raise
170
+
171
+ # Run whisper on the audio with language unless auto
172
+ try:
173
+ print(f"Starting whisper transcribe with {uid}.mp3")
174
+ transcribe_whisper_result = transcribe(audio, translate_action=False, language=language, override_model_size=model_size)
175
+ detected_language = LANGUAGES[transcribe_whisper_result["language"]]
176
+ translate_whisper_result = transcribe(audio, translate_action=True, language=detected_language, override_model_size=model_size)
177
+ print(f"Transcribe successful!, writing files")
178
+ vtt_path = tempdir / f"{transcribe_whisper_result['language']}.vtt"
179
+ en_vtt_path = tempdir / f"en.vtt"
180
+
181
+ with open(vtt_path.resolve(), "w", encoding="utf-8") as vtt:
182
+ write_vtt(transcribe_whisper_result["segments"], file=vtt)
183
+
184
+ with open(en_vtt_path.resolve(), "w", encoding="utf-8") as en_vtt:
185
+ write_vtt(transcribe_whisper_result["segments"], file=en_vtt)
186
+
187
+ except Exception as e:
188
+ print(f"Could not transcribe file: {e}")
189
+ return
190
+
191
+ whisper_result_captions = [
192
+ {
193
+ "language_tag": transcribe_whisper_result["language"],
194
+ "vtt_file": anvil.BlobMedia(content_type="text/plain", content=vtt_path.read_bytes(), name=f"{uid}.{transcribe_whisper_result['language']}.vtt")
195
+ },
196
+ {
197
+ "language_tag": "en",
198
+ "vtt_file": anvil.BlobMedia(content_type="text/plain", content=vtt_path.read_bytes(), name=f"{uid}.en.vtt")
199
+ }
200
+ ]
201
+
202
+ return 'success', whisper_result_captions
203
 
204
  # Run whisper with translation task enabled (and save to different srt file)
205
  # Call anvil background task with both files, and both the plain texts
 
213
  print(filename)
214
  yield f"Downloaded {filename}"
215
 
216
+ def download(url, tempdir, format="bestvideo[ext=mp4]+bestaudio/best", verbose=False, keepVideo=True, filename="%(id)s.%(ext)s"):
217
  try:
218
  ydl_opts = {
219
  "format": format,
 
224
  'preferredquality': '192',
225
  }],
226
  "skip_download": False,
227
+ "outtmpl": f"{tempdir}/{filename}",
228
  "noplaylist": True,
229
  "verbose": verbose,
230
+ "quiet": False,
231
  "progress_hooks": [progress_hook],
232
 
233
  }
 
246
  else:
247
  return meta, None, str(audio.resolve())
248
 
249
+ def download_audio(url, tempdir, format="bestaudio/best", verbose=False, id=None):
250
+ filename = f"{id}.%(ext)s"
251
+ try:
252
+ ydl_opts = {
253
+ "format": format,
254
+ "keepvideo": False,
255
+ 'postprocessors': [{
256
+ 'key': 'FFmpegExtractAudio',
257
+ 'preferredcodec': 'mp3',
258
+ 'preferredquality': '192',
259
+ }],
260
+ "skip_download": False,
261
+ "outtmpl": f"{tempdir}/{filename}",
262
+ "noplaylist": True,
263
+ "verbose": verbose,
264
+ "quiet": False,
265
+ "progress_hooks": [progress_hook],
266
+
267
+ }
268
+ ydl = YoutubeDL(ydl_opts)
269
+ meta = ydl.extract_info(
270
+ url,
271
+ download=True,
272
+ )
273
+ except DownloadError as e:
274
+ raise e
275
+ else:
276
+ audio = tempdir / f"{id}.mp3"
277
+ return meta, str(audio.resolve())
278
 
279
  def check_download(url):
280
  ydl_opts = {
 
295
  return meta
296
 
297
  def transcribe(audio, translate_action=True, language='Autodetect', override_model_size=''):
298
+ """
299
+ Transcribe audio file with whisper
300
+ :param audio: - The audio file to transcribe
301
+ :param translate_action: Bool - Whether to translate to English or keep original language
302
+ :param language: String - The language to transcribe to, default is Autodetect
303
+ :param override_model_size: Bool - Whether to override the model size
304
+ :return:
305
+ """
306
  task = "translate" if translate_action else "transcribe"
307
  model_size_to_load = override_model_size if override_model_size else model_size
308
  print(f'Starting {task} with whisper size {model_size_to_load} on {audio}')
requirements.txt CHANGED
@@ -1,8 +1,9 @@
1
- youtube-dl==2021.12.17
2
  whisper @ git+https://github.com/openai/whisper.git@main#egg=whisper==1.1.5
3
  anvil-uplink==0.4.0
4
  gradio==3.4.0
5
  python-dotenv==0.21.0
6
  aiohttp==3.8.3
7
  aiohttp-requests==0.1.3
8
- fsspec=2022.8.2
 
 
1
+ youtube-dl==2021.12.17 #remove this, moved to yt-dlp
2
  whisper @ git+https://github.com/openai/whisper.git@main#egg=whisper==1.1.5
3
  anvil-uplink==0.4.0
4
  gradio==3.4.0
5
  python-dotenv==0.21.0
6
  aiohttp==3.8.3
7
  aiohttp-requests==0.1.3
8
+ fsspec=2022.8.2
9
+ yt-dlp==2022.10.4
utils/apis.py CHANGED
@@ -15,7 +15,7 @@ from download import download_generator, caption_generator
15
 
16
  dotenv.load_dotenv()
17
 
18
-
19
  @anvil.server.callable
20
  def call_gradio_api(api_name='test_api', data=()):
21
  port = os.environ.get('SERVER_PORT', 8111)
@@ -64,16 +64,16 @@ def test_api(url=''):
64
  return f"I've slept for 15 seconds and now I'm done. "
65
 
66
  #TODO: add telegram error handler here
67
- def caption(tweet_url="", language="Autodetect", override_model_size=""):
68
  """
69
  :param media_id: The twitter media ID object
70
  :param user_id_str: The twitter user ID string
71
  :param tweet_url: tweet URL can potentially not exist in the future, so we can upload on behalf of the user
72
  :return:
73
  """
74
- response = caption_generator(tweet_url, language, override_model_size)
75
- return json.dumps(response)
76
-
77
 
78
  def render_api_elements(url_input, download_status, output_text, sub_video, output_file):
79
  with gr.Group(elem_id='fake_ass_group') as api_buttons:
@@ -97,6 +97,7 @@ def render_api_elements(url_input, download_status, output_text, sub_video, outp
97
  fn=caption,
98
  inputs=[
99
  gr.Text(label='tweet_url'),
 
100
  gr.Text(label='language (optional)'),
101
  gr.Dropdown(label='Model Size', choices=['base', 'tiny', 'small', 'medium', 'large']),
102
  ],
 
15
 
16
  dotenv.load_dotenv()
17
 
18
+ @anvil.server.background_task
19
  @anvil.server.callable
20
  def call_gradio_api(api_name='test_api', data=()):
21
  port = os.environ.get('SERVER_PORT', 8111)
 
64
  return f"I've slept for 15 seconds and now I'm done. "
65
 
66
  #TODO: add telegram error handler here
67
+ def caption(downloadable_url="",uid="", language="Autodetect", override_model_size=""):
68
  """
69
  :param media_id: The twitter media ID object
70
  :param user_id_str: The twitter user ID string
71
  :param tweet_url: tweet URL can potentially not exist in the future, so we can upload on behalf of the user
72
  :return:
73
  """
74
+ status, whisper_result_captions = caption_generator(downloadable_url, uid, language, override_model_size)
75
+ anvil.server.launch_background_task('add_captions_to_video', uid, whisper_result_captions)
76
+ return {'status': status, 'message': 'started a background process to upload subtitles to {uid}' }
77
 
78
  def render_api_elements(url_input, download_status, output_text, sub_video, output_file):
79
  with gr.Group(elem_id='fake_ass_group') as api_buttons:
 
97
  fn=caption,
98
  inputs=[
99
  gr.Text(label='tweet_url'),
100
+ gr.Text(label='media_uid'),
101
  gr.Text(label='language (optional)'),
102
  gr.Dropdown(label='Model Size', choices=['base', 'tiny', 'small', 'medium', 'large']),
103
  ],