Change UI
Browse files- app.py +80 -86
- requirements.txt +0 -11
- transcribe.py +5 -1
app.py
CHANGED
@@ -26,7 +26,7 @@ if DEVICE == "cpu":
|
|
26 |
# I supose that I am on huggingface server
|
27 |
SECONDS = 300
|
28 |
else:
|
29 |
-
SECONDS =
|
30 |
|
31 |
YOUTUBE = "youtube"
|
32 |
TWITCH = "twitch"
|
@@ -231,8 +231,9 @@ def clear_video_url():
|
|
231 |
image = gr.Image(visible=visible, scale=1)
|
232 |
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
233 |
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
|
|
|
|
234 |
translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
|
235 |
-
original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
|
236 |
original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
|
237 |
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
238 |
return (
|
@@ -240,8 +241,8 @@ def clear_video_url():
|
|
240 |
image,
|
241 |
source_languaje,
|
242 |
target_languaje,
|
|
|
243 |
translate_button,
|
244 |
-
original_audio,
|
245 |
original_audio_transcribed,
|
246 |
original_audio_translated,
|
247 |
)
|
@@ -260,10 +261,9 @@ def is_valid_youtube_url(url):
|
|
260 |
def is_valid_url(url):
|
261 |
source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
262 |
target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=True)
|
267 |
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False)
|
268 |
|
269 |
# Youtube
|
@@ -275,24 +275,18 @@ def is_valid_url(url):
|
|
275 |
gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
|
276 |
source_languaje,
|
277 |
target_languaje,
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
original_audio_transcribed,
|
282 |
-
original_audio_translated,
|
283 |
-
subtitled_video
|
284 |
)
|
285 |
else:
|
286 |
return (
|
287 |
gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
|
288 |
source_languaje,
|
289 |
target_languaje,
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
original_audio_transcribed,
|
294 |
-
original_audio_translated,
|
295 |
-
subtitled_video
|
296 |
)
|
297 |
|
298 |
# Twitch
|
@@ -301,12 +295,9 @@ def is_valid_url(url):
|
|
301 |
gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
|
302 |
source_languaje,
|
303 |
target_languaje,
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
original_audio_transcribed,
|
308 |
-
original_audio_translated,
|
309 |
-
subtitled_video
|
310 |
)
|
311 |
|
312 |
# Error
|
@@ -314,49 +305,40 @@ def is_valid_url(url):
|
|
314 |
image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
|
315 |
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
316 |
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
|
321 |
-
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
322 |
-
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
|
323 |
return (
|
324 |
image,
|
325 |
source_languaje,
|
326 |
target_languaje,
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
original_audio_transcribed,
|
331 |
-
original_audio_translated,
|
332 |
-
subtitled_video
|
333 |
)
|
334 |
|
335 |
-
def get_audio_and_video_from_video(url
|
336 |
python_file = "download.py"
|
337 |
command = f"python {python_file} {url}"
|
338 |
os.system(command)
|
339 |
-
|
340 |
|
341 |
audio = "audios/download_audio.mp3"
|
342 |
video = "videos/download_video.mp4"
|
343 |
|
344 |
return (
|
345 |
-
gr.
|
346 |
gr.Textbox(value=audio, label="Original audio path", elem_id="original_audio_path", visible=False),
|
347 |
gr.Textbox(value=video, label="Original video path", elem_id="original_video_path", visible=False)
|
348 |
)
|
349 |
|
350 |
-
def
|
351 |
folder_vocals = "vocals"
|
352 |
folder_chunck = "chunks"
|
353 |
-
folder_concatenated = "concatenated_transcriptions"
|
354 |
if not os.path.exists(folder_vocals):
|
355 |
os.makedirs(folder_vocals)
|
356 |
if not os.path.exists(folder_chunck):
|
357 |
os.makedirs(folder_chunck)
|
358 |
-
if not os.path.exists(folder_concatenated):
|
359 |
-
os.makedirs(folder_concatenated)
|
360 |
python_file = "slice_audio.py"
|
361 |
command = f"python {python_file} {audio_path} {SECONDS}"
|
362 |
os.system(command)
|
@@ -365,7 +347,14 @@ def trascribe_audio(audio_path, source_languaje):
|
|
365 |
f.write(str(0))
|
366 |
command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
|
367 |
os.system(command)
|
|
|
|
|
|
|
|
|
|
|
368 |
|
|
|
|
|
369 |
python_file = "transcribe.py"
|
370 |
chunck_file = "chunks/output_files.txt"
|
371 |
speakers_file = "vocals/speakers.txt"
|
@@ -393,6 +382,16 @@ def trascribe_audio(audio_path, source_languaje):
|
|
393 |
command = f"rm {vocal}"
|
394 |
os.system(command)
|
395 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
396 |
python_file = "concat_transcriptions.py"
|
397 |
command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
|
398 |
os.system(command)
|
@@ -411,8 +410,8 @@ def trascribe_audio(audio_path, source_languaje):
|
|
411 |
result = f.read()
|
412 |
|
413 |
return (
|
414 |
-
|
415 |
-
gr.Textbox(value=audio_transcribed, label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
|
416 |
)
|
417 |
|
418 |
def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje):
|
@@ -431,7 +430,7 @@ def translate_transcription(original_audio_transcribed_path, source_languaje, ta
|
|
431 |
os.system(command)
|
432 |
|
433 |
return (
|
434 |
-
|
435 |
gr.Textbox(value=translated_transcription, label="Original audio translated", elem_id="original_audio_translated", visible=False)
|
436 |
)
|
437 |
|
@@ -458,56 +457,52 @@ def add_translated_subtitles_to_video(original_video_path, original_audio_path,
|
|
458 |
def subtify():
|
459 |
with gr.Blocks() as demo:
|
460 |
# Layout
|
|
|
461 |
gr.Markdown("""# Subtify""")
|
462 |
-
gr.Markdown(f"translate, Python: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}")
|
463 |
-
# model = transformers.AutoModel.from_pretrained("huggingface/my_model")
|
464 |
-
# gr.Markdown(f"model.config.url: {model.config.url}")
|
465 |
-
token = os.getenv("HF_TOKEN")
|
466 |
-
if token is not None:
|
467 |
-
print(token)
|
468 |
-
gr.Markdown(f"Huggingface token: {token}")
|
469 |
-
else:
|
470 |
-
gr.Markdown(f"Huggingface token: None")
|
471 |
with gr.Row(variant="panel"):
|
472 |
url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
|
473 |
-
copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
|
474 |
-
delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
|
475 |
|
476 |
-
stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
|
477 |
visible = False
|
478 |
with gr.Row(equal_height=False):
|
479 |
image = gr.Image(visible=visible, scale=1)
|
480 |
with gr.Column():
|
481 |
with gr.Row():
|
482 |
-
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
483 |
-
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
|
|
484 |
with gr.Row():
|
485 |
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
|
486 |
|
487 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
488 |
original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=False)
|
489 |
original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=False)
|
490 |
-
original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
|
491 |
original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
|
492 |
-
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
493 |
original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False)
|
494 |
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
|
495 |
|
496 |
# Events
|
497 |
# copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
|
498 |
-
delete_button.click(
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
)
|
511 |
url_textbox.change(
|
512 |
fn=is_valid_url,
|
513 |
inputs=url_textbox,
|
@@ -515,18 +510,17 @@ def subtify():
|
|
515 |
image,
|
516 |
source_languaje,
|
517 |
target_languaje,
|
|
|
518 |
subtify_button,
|
519 |
-
|
520 |
-
original_audio,
|
521 |
-
original_audio_transcribed,
|
522 |
-
original_audio_translated,
|
523 |
-
subtitled_video
|
524 |
]
|
525 |
)
|
526 |
-
subtify_button.click(fn=get_audio_and_video_from_video, inputs=[url_textbox
|
527 |
-
|
528 |
-
|
529 |
-
|
|
|
|
|
530 |
|
531 |
demo.launch()
|
532 |
|
|
|
26 |
# I supose that I am on huggingface server
|
27 |
SECONDS = 300
|
28 |
else:
|
29 |
+
SECONDS = 300
|
30 |
|
31 |
YOUTUBE = "youtube"
|
32 |
TWITCH = "twitch"
|
|
|
231 |
image = gr.Image(visible=visible, scale=1)
|
232 |
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
233 |
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
234 |
+
num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
|
235 |
+
number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
|
236 |
translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
|
|
|
237 |
original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
|
238 |
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
239 |
return (
|
|
|
241 |
image,
|
242 |
source_languaje,
|
243 |
target_languaje,
|
244 |
+
number_of_speakers,
|
245 |
translate_button,
|
|
|
246 |
original_audio_transcribed,
|
247 |
original_audio_translated,
|
248 |
)
|
|
|
261 |
def is_valid_url(url):
|
262 |
source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
263 |
target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
264 |
+
num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
|
265 |
+
number_of_speakers = gr.Dropdown(visible=True, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
|
266 |
+
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=True)
|
|
|
267 |
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False)
|
268 |
|
269 |
# Youtube
|
|
|
275 |
gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
|
276 |
source_languaje,
|
277 |
target_languaje,
|
278 |
+
number_of_speakers,
|
279 |
+
subtify_button,
|
280 |
+
subtitled_video,
|
|
|
|
|
|
|
281 |
)
|
282 |
else:
|
283 |
return (
|
284 |
gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
|
285 |
source_languaje,
|
286 |
target_languaje,
|
287 |
+
number_of_speakers,
|
288 |
+
subtify_button,
|
289 |
+
subtitled_video,
|
|
|
|
|
|
|
290 |
)
|
291 |
|
292 |
# Twitch
|
|
|
295 |
gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
|
296 |
source_languaje,
|
297 |
target_languaje,
|
298 |
+
number_of_speakers,
|
299 |
+
subtify_button,
|
300 |
+
subtitled_video,
|
|
|
|
|
|
|
301 |
)
|
302 |
|
303 |
# Error
|
|
|
305 |
image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
|
306 |
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
307 |
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
308 |
+
number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
|
309 |
+
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
|
310 |
+
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False),
|
|
|
|
|
|
|
311 |
return (
|
312 |
image,
|
313 |
source_languaje,
|
314 |
target_languaje,
|
315 |
+
number_of_speakers,
|
316 |
+
subtify_button,
|
317 |
+
subtitled_video,
|
|
|
|
|
|
|
318 |
)
|
319 |
|
320 |
+
def get_audio_and_video_from_video(url):
|
321 |
python_file = "download.py"
|
322 |
command = f"python {python_file} {url}"
|
323 |
os.system(command)
|
324 |
+
sleep(5)
|
325 |
|
326 |
audio = "audios/download_audio.mp3"
|
327 |
video = "videos/download_video.mp4"
|
328 |
|
329 |
return (
|
330 |
+
gr.Textbox(value="Ok", label="Video downloaded", elem_id="video_downloaded", interactive=False, visible=True),
|
331 |
gr.Textbox(value=audio, label="Original audio path", elem_id="original_audio_path", visible=False),
|
332 |
gr.Textbox(value=video, label="Original video path", elem_id="original_video_path", visible=False)
|
333 |
)
|
334 |
|
335 |
+
def slice_audio(audio_path):
|
336 |
folder_vocals = "vocals"
|
337 |
folder_chunck = "chunks"
|
|
|
338 |
if not os.path.exists(folder_vocals):
|
339 |
os.makedirs(folder_vocals)
|
340 |
if not os.path.exists(folder_chunck):
|
341 |
os.makedirs(folder_chunck)
|
|
|
|
|
342 |
python_file = "slice_audio.py"
|
343 |
command = f"python {python_file} {audio_path} {SECONDS}"
|
344 |
os.system(command)
|
|
|
347 |
f.write(str(0))
|
348 |
command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
|
349 |
os.system(command)
|
350 |
+
# sleep(5)
|
351 |
+
|
352 |
+
return (
|
353 |
+
gr.Textbox(value="Ok", label="Video sliced", elem_id="video_sliced", interactive=False, visible=True)
|
354 |
+
)
|
355 |
|
356 |
+
def trascribe_audio(source_languaje):
|
357 |
+
folder_vocals = "vocals"
|
358 |
python_file = "transcribe.py"
|
359 |
chunck_file = "chunks/output_files.txt"
|
360 |
speakers_file = "vocals/speakers.txt"
|
|
|
382 |
command = f"rm {vocal}"
|
383 |
os.system(command)
|
384 |
|
385 |
+
return (
|
386 |
+
gr.Textbox(value="Ok", label="Video transcribed", elem_id="video_transcribed", interactive=False, visible=True)
|
387 |
+
)
|
388 |
+
|
389 |
+
def concatenate_transcriptions():
|
390 |
+
folder_concatenated = "concatenated_transcriptions"
|
391 |
+
if not os.path.exists(folder_concatenated):
|
392 |
+
os.makedirs(folder_concatenated)
|
393 |
+
chunck_file = "chunks/output_files.txt"
|
394 |
+
speakers_file = "vocals/speakers.txt"
|
395 |
python_file = "concat_transcriptions.py"
|
396 |
command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
|
397 |
os.system(command)
|
|
|
410 |
result = f.read()
|
411 |
|
412 |
return (
|
413 |
+
gr.Textbox(value="Ok", label="Transcription translated", elem_id="transcription_translated", interactive=False, visible=True),
|
414 |
+
gr.Textbox(value=audio_transcribed, label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False),
|
415 |
)
|
416 |
|
417 |
def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje):
|
|
|
430 |
os.system(command)
|
431 |
|
432 |
return (
|
433 |
+
gr.Textbox(value="Ok", label="Video subtitled", elem_id="video_subtitled", interactive=False, visible=True),
|
434 |
gr.Textbox(value=translated_transcription, label="Original audio translated", elem_id="original_audio_translated", visible=False)
|
435 |
)
|
436 |
|
|
|
457 |
def subtify():
|
458 |
with gr.Blocks() as demo:
|
459 |
# Layout
|
460 |
+
num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
|
461 |
gr.Markdown("""# Subtify""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
462 |
with gr.Row(variant="panel"):
|
463 |
url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
|
464 |
+
# copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
|
465 |
+
# delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
|
466 |
|
|
|
467 |
visible = False
|
468 |
with gr.Row(equal_height=False):
|
469 |
image = gr.Image(visible=visible, scale=1)
|
470 |
with gr.Column():
|
471 |
with gr.Row():
|
472 |
+
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True, info="Language of the video")
|
473 |
+
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True, info="Language to translate the subtitles")
|
474 |
+
number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True, info="Number of speakers in the video, if you don't know, select 10")
|
475 |
with gr.Row():
|
476 |
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
|
477 |
|
478 |
+
with gr.Row():
|
479 |
+
video_donwloaded = gr.Textbox(placeholder="Waiting", label="Video downloaded", elem_id="video_downloaded", interactive=False, visible=True)
|
480 |
+
video_sliced = gr.Textbox(placeholder="Waiting", label="Video sliced", elem_id="video_sliced", interactive=False, visible=True)
|
481 |
+
video_transcribed = gr.Textbox(placeholder="Waiting", label="Video transcribed", elem_id="video_transcribed", interactive=False, visible=True)
|
482 |
+
video_translated = gr.Textbox(placeholder="Waiting", label="Transcription translated", elem_id="transcription_translated", interactive=False, visible=True)
|
483 |
+
video_subtitled = gr.Textbox(placeholder="Waiting", label="Video subtitled", elem_id="video_subtitled", interactive=False, visible=True)
|
484 |
+
|
485 |
original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=False)
|
486 |
original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=False)
|
|
|
487 |
original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
|
|
|
488 |
original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False)
|
489 |
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
|
490 |
|
491 |
# Events
|
492 |
# copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
|
493 |
+
# delete_button.click(
|
494 |
+
# fn=clear_video_url,
|
495 |
+
# outputs=[
|
496 |
+
# url_textbox,
|
497 |
+
# image,
|
498 |
+
# source_languaje,
|
499 |
+
# target_languaje,
|
500 |
+
# number_of_speakers,
|
501 |
+
# subtify_button,
|
502 |
+
# original_audio_transcribed,
|
503 |
+
# original_audio_translated,
|
504 |
+
# ]
|
505 |
+
# )
|
506 |
url_textbox.change(
|
507 |
fn=is_valid_url,
|
508 |
inputs=url_textbox,
|
|
|
510 |
image,
|
511 |
source_languaje,
|
512 |
target_languaje,
|
513 |
+
number_of_speakers,
|
514 |
subtify_button,
|
515 |
+
subtitled_video,
|
|
|
|
|
|
|
|
|
516 |
]
|
517 |
)
|
518 |
+
subtify_button.click(fn=get_audio_and_video_from_video, inputs=[url_textbox], outputs=[video_donwloaded, original_audio_path, original_video_path])
|
519 |
+
video_donwloaded.change(fn=slice_audio, inputs=[original_audio_path], outputs=[video_sliced])
|
520 |
+
video_sliced.change(fn=trascribe_audio, inputs=[source_languaje], outputs=[video_transcribed])
|
521 |
+
video_transcribed.change(fn=concatenate_transcriptions, inputs=[], outputs=[video_translated, original_audio_transcribed_path])
|
522 |
+
video_translated.change(fn=translate_transcription, inputs=[original_audio_transcribed_path, source_languaje, target_languaje], outputs=[video_subtitled, original_audio_translated_path])
|
523 |
+
video_subtitled.change(fn=add_translated_subtitles_to_video, inputs=[original_video_path, original_audio_path, original_audio_translated_path], outputs=subtitled_video)
|
524 |
|
525 |
demo.launch()
|
526 |
|
requirements.txt
CHANGED
@@ -9,18 +9,7 @@ pytube
|
|
9 |
yt-dlp
|
10 |
twitch-dl
|
11 |
|
12 |
-
# # mossformer --> separate speech audios
|
13 |
-
# torch
|
14 |
-
# torchvision
|
15 |
-
# torchaudio
|
16 |
-
# speechbrain
|
17 |
-
# soundfile
|
18 |
-
# modelscope
|
19 |
-
# rotary-embedding-torch
|
20 |
-
# transformers
|
21 |
-
|
22 |
# Trascribe audios
|
23 |
-
# git+https://github.com/openai/whisper.git
|
24 |
git+https://github.com/m-bain/whisperx.git
|
25 |
pyannote.audio
|
26 |
|
|
|
9 |
yt-dlp
|
10 |
twitch-dl
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
# Trascribe audios
|
|
|
13 |
git+https://github.com/m-bain/whisperx.git
|
14 |
pyannote.audio
|
15 |
|
transcribe.py
CHANGED
@@ -28,7 +28,11 @@ def transcribe(audio_file, language, device, vocals):
|
|
28 |
model = "large-v2"
|
29 |
# word_timestamps = True
|
30 |
print_progress = False
|
31 |
-
|
|
|
|
|
|
|
|
|
32 |
fp16 = True
|
33 |
batch_size = 8
|
34 |
verbose = False
|
|
|
28 |
model = "large-v2"
|
29 |
# word_timestamps = True
|
30 |
print_progress = False
|
31 |
+
if device == "cpu":
|
32 |
+
# I supose that I am on huggingface server
|
33 |
+
compute_type = "float32"
|
34 |
+
else:
|
35 |
+
compute_type = "float16"
|
36 |
fp16 = True
|
37 |
batch_size = 8
|
38 |
verbose = False
|