import gradio as gr | |
import argparse | |
import sys | |
# import os | |
# import torch | |
# from time import sleep | |
# from tqdm import tqdm | |
# from lang_list import union_language_dict | |
# # import pyperclip | |
# from pytube import YouTube | |
# import re | |
# NUMBER = 100 | |
# DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
# # DEVICE = "cpu" | |
# DOWNLOAD = True | |
# SLICE_AUDIO = False | |
# SEPARE_VOCALS = False | |
# TRANSCRIBE_AUDIO = False | |
# CONCATENATE_TRANSCRIPTIONS = False | |
# TRANSLATE_TRANSCRIPTIONS = False | |
# ADD_SUBTITLES_TO_VIDEO = False | |
# REMOVE_FILES = False | |
# REMOVE_ALL = False | |
# if SEPARE_VOCALS: | |
# SECONDS = 150 | |
# else: | |
# SECONDS = 300 | |
# YOUTUBE = "youtube" | |
# TWITCH = "twitch" | |
# ERROR = "error" | |
# language_dict = union_language_dict() | |
# def subtify_no_ui(): | |
# number_works = 7 | |
# progress_bar = tqdm(total=number_works, desc="Subtify") | |
# ################## Download video and audio ################## | |
# if DOWNLOAD: | |
# print('*'*NUMBER) | |
# # url = "https://www.twitch.tv/videos/1936119752" # twitch Rob Mula 2 horas | |
# # url = "https://www.youtube.com/watch?v=yX5EJf4R77s" # ✅ debate, varios hablantes, 3 minutos | |
# # url = "https://www.youtube.com/watch?v=cgx0QnXo1OU" # ✅ smart home, un solo hablante, 4:42 minutos | |
# url = "https://www.youtube.com/watch?v=dgOBxhi19T8" # ✅ rob mula, muchos hablantes, 4:28 minutos | |
# # url = "https://www.youtube.com/watch?v=Coj72EzmX20" # rob mula, un solo hablante, 16 minutos | |
# # url = "https://www.youtube.com/watch?v=Tqth0fKo0_g" # Conversación short | |
# print(f"Downloading video and audio from {url}") | |
# python_file = "download.py" | |
# command = f"python {python_file} {url}" | |
# os.system(command) | |
# sleep(5) | |
# print('*'*NUMBER) | |
# print("\n\n") | |
# progress_bar.update(1) | |
# ################## Slice audio ################## | |
# if SLICE_AUDIO: | |
# print('*'*NUMBER) | |
# print("Slicing audio") | |
# python_file = "slice_audio.py" | |
# audio = "audios/download_audio.mp3" | |
# command = f"python {python_file} {audio} {SECONDS}" | |
# os.system(command) | |
# print('*'*NUMBER) | |
# print("\n\n") | |
# progress_bar.update(1) | |
# ################## Get vocals ################## | |
# chunck_file = "chunks/output_files.txt" | |
# print('*'*NUMBER) | |
# if SEPARE_VOCALS: | |
# print("Get vocals") | |
# python_file = "separe_vocals.py" | |
# command = f"python {python_file} {chunck_file} {DEVICE}" | |
# os.system(command) | |
# if REMOVE_FILES: | |
# with open(chunck_file, 'r') as f: | |
# files = f.read().splitlines() | |
# for file in files: | |
# command = f"rm {file}" | |
# os.system(command) | |
# else: | |
# print("Moving chunks") | |
# folder_vocals = "vocals" | |
# folder_chunck = "chunks" | |
# with open(f"{folder_vocals}/speakers.txt", 'w') as f: | |
# f.write(str(0)) | |
# if REMOVE_FILES: | |
# command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/" | |
# os.system(command) | |
# else: | |
# command = f"cp {folder_chunck}/*.mp3 {folder_vocals}/" | |
# os.system(command) | |
# print('*'*NUMBER) | |
# print("\n\n") | |
# progress_bar.update(1) | |
# ################# Transcript vocals ################## | |
# speakers_file = "vocals/speakers.txt" | |
# if TRANSCRIBE_AUDIO: | |
# print('*'*NUMBER) | |
# print("Transcript vocals") | |
# python_file = "transcribe.py" | |
# language = "English" | |
# command = f"python {python_file} {chunck_file} {language} {speakers_file} {DEVICE} {not SEPARE_VOCALS}" | |
# os.system(command) | |
# if REMOVE_FILES: | |
# vocals_folder = "vocals" | |
# with open(chunck_file, 'r') as f: | |
# files = f.read().splitlines() | |
# with open(speakers_file, 'r') as f: | |
# speakers = f.read().splitlines() | |
# speakers = int(speakers[0]) | |
# for file in files: | |
# if speakers > 0: | |
# vocals_extension = "wav" | |
# for i in range(speakers): | |
# file_name, _ = file.split(".") | |
# _, file_name = file_name.split("/") | |
# vocal = f'{vocals_folder}/{file_name}_speaker{i:003d}.{vocals_extension}' | |
# command = f"rm {vocal}" | |
# os.system(command) | |
# else: | |
# vocals_extension = "mp3" | |
# file_name, _ = file.split(".") | |
# _, file_name = file_name.split("/") | |
# vocal = f'{vocals_folder}/{file_name}.{vocals_extension}' | |
# command = f"rm {vocal}" | |
# os.system(command) | |
# print('*'*NUMBER) | |
# print("\n\n") | |
# progress_bar.update(1) | |
# ################## Concatenate transcriptions ################## | |
# if CONCATENATE_TRANSCRIPTIONS: | |
# print('*'*NUMBER) | |
# print("Concatenate transcriptions") | |
# python_file = "concat_transcriptions.py" | |
# command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}" | |
# os.system(command) | |
# if REMOVE_FILES: | |
# with open(chunck_file, 'r') as f: | |
# files = f.read().splitlines() | |
# for file in files: | |
# file_name, _ = file.split(".") | |
# _, file_name = file_name.split("/") | |
# transcriptions_folder = "transcriptions" | |
# transcription_extension = "srt" | |
# command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}" | |
# os.system(command) | |
# print('*'*NUMBER) | |
# print("\n\n") | |
# progress_bar.update(1) | |
# ################## Translate transcription ################## | |
# target_languaje = "Español" | |
# if TRANSLATE_TRANSCRIPTIONS: | |
# print('*'*NUMBER) | |
# print("Translate transcription") | |
# transcription_file = "concatenated_transcriptions/download_audio.srt" | |
# source_languaje = "English" | |
# python_file = "translate_transcriptions.py" | |
# command = f"python {python_file} {transcription_file} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}" | |
# os.system(command) | |
# if REMOVE_FILES: | |
# command = f"rm {transcription_file}" | |
# os.system(command) | |
# print('*'*NUMBER) | |
# print("\n\n") | |
# progress_bar.update(1) | |
# ################## Add subtitles to video ################## | |
# if ADD_SUBTITLES_TO_VIDEO: | |
# print('*'*NUMBER) | |
# print("Add subtitles to video") | |
# python_file = "add_subtitles_to_video.py" | |
# transcription_file = f"translated_transcriptions/download_audio_{target_languaje}.srt" | |
# input_video_file = "videos/download_video.mp4" | |
# input_audio_file = "audios/download_audio.mp3" | |
# command = f"python {python_file} {transcription_file} {input_video_file} {input_audio_file}" | |
# os.system(command) | |
# if REMOVE_FILES: | |
# command = f"rm {input_video_file}" | |
# os.system(command) | |
# command = f"rm {input_audio_file}" | |
# os.system(command) | |
# command = f"rm {transcription_file}" | |
# os.system(command) | |
# command = f"rm chunks/output_files.txt" | |
# os.system(command) | |
# command = f"rm vocals/speakers.txt" | |
# os.system(command) | |
# print('*'*NUMBER) | |
# print("\n\n") | |
# progress_bar.update(1) | |
# ################## Remove all ################## | |
# if REMOVE_ALL: | |
# command = f"rm audios/*" | |
# os.system(command) | |
# command = f"rm chunks/*" | |
# os.system(command) | |
# command = f"rm concatenated_transcriptions/*" | |
# os.system(command) | |
# command = f"rm transcriptions/*" | |
# os.system(command) | |
# command = f"rm translated_transcriptions/*" | |
# os.system(command) | |
# # Check if videos/download_video.mp4 exists | |
# if os.path.isfile("videos/download_video.mp4"): | |
# command = f"rm videos/download_video.mp4" | |
# os.system(command) | |
# # command = f"rm videos/*" | |
# # os.system(command) | |
# command = f"rm vocals/*" | |
# os.system(command) | |
# # def copy_url_from_clipboard(): | |
# # return pyperclip.paste() | |
# def clear_video_url(): | |
# visible = False | |
# image = gr.Image(visible=visible, scale=1) | |
# source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True) | |
# target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True) | |
# translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible) | |
# original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False) | |
# original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible) | |
# original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible) | |
# return ( | |
# "", | |
# image, | |
# source_languaje, | |
# target_languaje, | |
# translate_button, | |
# original_audio, | |
# original_audio_transcribed, | |
# original_audio_translated, | |
# ) | |
# def get_youtube_thumbnail(url): | |
# yt = YouTube(url) | |
# thumbnail_url = yt.thumbnail_url | |
# return thumbnail_url | |
# def is_valid_youtube_url(url): | |
# patron_youtube = r'(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+' | |
# if not re.match(patron_youtube, url): | |
# return False | |
# return True | |
# def is_valid_url(url): | |
# source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True) | |
# target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True) | |
# translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=True) | |
# original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=True, interactive=False) | |
# original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=True) | |
# original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=True) | |
# subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False) | |
# # Youtube | |
# if "youtube" in url.lower() or "youtu.be" in url.lower(): | |
# if is_valid_youtube_url(url): | |
# thumbnail = get_youtube_thumbnail(url) | |
# if thumbnail: | |
# return ( | |
# gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False), | |
# source_languaje, | |
# target_languaje, | |
# translate_button, | |
# gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False), | |
# original_audio, | |
# original_audio_transcribed, | |
# original_audio_translated, | |
# subtitled_video | |
# ) | |
# else: | |
# return ( | |
# gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False), | |
# source_languaje, | |
# target_languaje, | |
# translate_button, | |
# gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False), | |
# original_audio, | |
# original_audio_transcribed, | |
# original_audio_translated, | |
# subtitled_video | |
# ) | |
# # Twitch | |
# elif "twitch" in url.lower() or "twitch.tv" in url.lower(): | |
# return ( | |
# gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False), | |
# source_languaje, | |
# target_languaje, | |
# translate_button, | |
# gr.Textbox(value=TWITCH, label="Stream page", elem_id="stream_page", visible=False), | |
# original_audio, | |
# original_audio_transcribed, | |
# original_audio_translated, | |
# subtitled_video | |
# ) | |
# # Error | |
# visible = False | |
# image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False) | |
# source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True) | |
# target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True) | |
# translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible) | |
# stream_page = gr.Textbox(value=ERROR, label="Stream page", elem_id="stream_page", visible=visible) | |
# original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False) | |
# original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible) | |
# original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible) | |
# subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False) | |
# return ( | |
# image, | |
# source_languaje, | |
# target_languaje, | |
# translate_button, | |
# stream_page, | |
# original_audio, | |
# original_audio_transcribed, | |
# original_audio_translated, | |
# subtitled_video | |
# ) | |
# def get_audio_and_video_from_video(url, stream_page): | |
# python_file = "download.py" | |
# command = f"python {python_file} {url}" | |
# os.system(command) | |
# # sleep(5) | |
# audio = "audios/download_audio.mp3" | |
# video = "videos/download_video.mp4" | |
# return ( | |
# gr.Audio(value=audio, label="Original audio", elem_id="original_audio", visible=True, interactive=False), | |
# gr.Textbox(value=audio, label="Original audio path", elem_id="original_audio_path", visible=False), | |
# gr.Textbox(value=video, label="Original video path", elem_id="original_video_path", visible=False) | |
# ) | |
# def trascribe_audio(audio_path, source_languaje): | |
# python_file = "slice_audio.py" | |
# command = f"python {python_file} {audio_path} {SECONDS}" | |
# os.system(command) | |
# folder_vocals = "vocals" | |
# folder_chunck = "chunks" | |
# with open(f"{folder_vocals}/speakers.txt", 'w') as f: | |
# f.write(str(0)) | |
# command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/" | |
# os.system(command) | |
# python_file = "transcribe.py" | |
# chunck_file = "chunks/output_files.txt" | |
# speakers_file = "vocals/speakers.txt" | |
# command = f"python {python_file} {chunck_file} {source_languaje} {speakers_file} {DEVICE} {not SEPARE_VOCALS}" | |
# os.system(command) | |
# with open(chunck_file, 'r') as f: | |
# files = f.read().splitlines() | |
# with open(speakers_file, 'r') as f: | |
# speakers = f.read().splitlines() | |
# speakers = int(speakers[0]) | |
# for file in files: | |
# if speakers > 0: | |
# vocals_extension = "wav" | |
# for i in range(speakers): | |
# file_name, _ = file.split(".") | |
# _, file_name = file_name.split("/") | |
# vocal = f'{folder_vocals}/{file_name}_speaker{i:003d}.{vocals_extension}' | |
# command = f"rm {vocal}" | |
# os.system(command) | |
# else: | |
# vocals_extension = "mp3" | |
# file_name, _ = file.split(".") | |
# _, file_name = file_name.split("/") | |
# vocal = f'{folder_vocals}/{file_name}.{vocals_extension}' | |
# command = f"rm {vocal}" | |
# os.system(command) | |
# python_file = "concat_transcriptions.py" | |
# command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}" | |
# os.system(command) | |
# with open(chunck_file, 'r') as f: | |
# files = f.read().splitlines() | |
# for file in files: | |
# file_name, _ = file.split(".") | |
# _, file_name = file_name.split("/") | |
# transcriptions_folder = "transcriptions" | |
# transcription_extension = "srt" | |
# command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}" | |
# os.system(command) | |
# audio_transcribed = "concatenated_transcriptions/download_audio.srt" | |
# with open(audio_transcribed, 'r') as f: | |
# result = f.read() | |
# return ( | |
# result, | |
# gr.Textbox(value=audio_transcribed, label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False) | |
# ) | |
# def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje): | |
# python_file = "translate_transcriptions.py" | |
# command = f"python {python_file} {original_audio_transcribed_path} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}" | |
# os.system(command) | |
# translated_transcription = f"translated_transcriptions/download_audio_{target_languaje}.srt" | |
# with open(translated_transcription, 'r') as f: | |
# result = f.read() | |
# transcription_file = "concatenated_transcriptions/download_audio.srt" | |
# command = f"rm {transcription_file}" | |
# os.system(command) | |
# return ( | |
# result, | |
# gr.Textbox(value=translated_transcription, label="Original audio translated", elem_id="original_audio_translated", visible=False) | |
# ) | |
# def add_translated_subtitles_to_video(original_video_path, original_audio_path, original_audio_translated_path): | |
# python_file = "add_subtitles_to_video.py" | |
# command = f"python {python_file} {original_audio_translated_path} {original_video_path} {original_audio_path}" | |
# os.system(command) | |
# command = f"rm {original_video_path}" | |
# os.system(command) | |
# command = f"rm {original_audio_path}" | |
# os.system(command) | |
# command = f"rm {original_audio_translated_path}" | |
# os.system(command) | |
# command = f"rm chunks/output_files.txt" | |
# os.system(command) | |
# command = f"rm vocals/speakers.txt" | |
# os.system(command) | |
# subtitled_video = "videos/download_video_with_subtitles.mp4" | |
# return gr.Video(value=subtitled_video, label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False) | |
def subtify(): | |
with gr.Blocks() as demo: | |
# Layout | |
gr.Markdown("""# Subtify""") | |
gr.Markdown("""# Subtify""") | |
gr.Markdown(f"Python {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}") | |
# with gr.Row(variant="panel"): | |
# url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True) | |
# copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0) | |
# delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0) | |
# stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False) | |
# visible = False | |
# with gr.Row(equal_height=False): | |
# image = gr.Image(visible=visible, scale=1) | |
# with gr.Column(): | |
# with gr.Row(): | |
# source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True) | |
# target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True) | |
# with gr.Row(): | |
# subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible) | |
# original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False) | |
# original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=False) | |
# original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=False) | |
# original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible) | |
# original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False) | |
# original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible) | |
# original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False) | |
# subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False) | |
# # Events | |
# # copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox) | |
# delete_button.click( | |
# fn=clear_video_url, | |
# outputs=[ | |
# url_textbox, | |
# image, | |
# source_languaje, | |
# target_languaje, | |
# subtify_button, | |
# original_audio, | |
# original_audio_transcribed, | |
# original_audio_translated, | |
# ] | |
# ) | |
# url_textbox.change( | |
# fn=is_valid_url, | |
# inputs=url_textbox, | |
# outputs=[ | |
# image, | |
# source_languaje, | |
# target_languaje, | |
# subtify_button, | |
# stream_page, | |
# original_audio, | |
# original_audio_transcribed, | |
# original_audio_translated, | |
# subtitled_video | |
# ] | |
# ) | |
# subtify_button.click(fn=get_audio_and_video_from_video, inputs=[url_textbox, stream_page], outputs=[original_audio, original_audio_path, original_video_path]) | |
# original_audio.change(fn=trascribe_audio, inputs=[original_audio_path, source_languaje], outputs=[original_audio_transcribed, original_audio_transcribed_path]) | |
# original_audio_transcribed.change(fn=translate_transcription, inputs=[original_audio_transcribed_path, source_languaje, target_languaje], outputs=[original_audio_translated, original_audio_translated_path]) | |
# original_audio_translated.change(fn=add_translated_subtitles_to_video, inputs=[original_video_path, original_audio_path, original_audio_translated_path], outputs=subtitled_video) | |
demo.launch() | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--no_ui", action="store_true") | |
args = parser.parse_args() | |
if args.no_ui: | |
pass | |
# subtify_no_ui() | |
else: | |
subtify() |