import gradio as gr import argparse # import os # import torch # from time import sleep # from tqdm import tqdm # from lang_list import union_language_dict # # import pyperclip # from pytube import YouTube # import re # NUMBER = 100 # DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # # DEVICE = "cpu" # DOWNLOAD = True # SLICE_AUDIO = False # SEPARE_VOCALS = False # TRANSCRIBE_AUDIO = False # CONCATENATE_TRANSCRIPTIONS = False # TRANSLATE_TRANSCRIPTIONS = False # ADD_SUBTITLES_TO_VIDEO = False # REMOVE_FILES = False # REMOVE_ALL = False # if SEPARE_VOCALS: # SECONDS = 150 # else: # SECONDS = 300 # YOUTUBE = "youtube" # TWITCH = "twitch" # ERROR = "error" # language_dict = union_language_dict() # def subtify_no_ui(): # number_works = 7 # progress_bar = tqdm(total=number_works, desc="Subtify") # ################## Download video and audio ################## # if DOWNLOAD: # print('*'*NUMBER) # # url = "https://www.twitch.tv/videos/1936119752" # twitch Rob Mula 2 horas # # url = "https://www.youtube.com/watch?v=yX5EJf4R77s" # ✅ debate, varios hablantes, 3 minutos # # url = "https://www.youtube.com/watch?v=cgx0QnXo1OU" # ✅ smart home, un solo hablante, 4:42 minutos # url = "https://www.youtube.com/watch?v=dgOBxhi19T8" # ✅ rob mula, muchos hablantes, 4:28 minutos # # url = "https://www.youtube.com/watch?v=Coj72EzmX20" # rob mula, un solo hablante, 16 minutos # # url = "https://www.youtube.com/watch?v=Tqth0fKo0_g" # Conversación short # print(f"Downloading video and audio from {url}") # python_file = "download.py" # command = f"python {python_file} {url}" # os.system(command) # sleep(5) # print('*'*NUMBER) # print("\n\n") # progress_bar.update(1) # ################## Slice audio ################## # if SLICE_AUDIO: # print('*'*NUMBER) # print("Slicing audio") # python_file = "slice_audio.py" # audio = "audios/download_audio.mp3" # command = f"python {python_file} {audio} {SECONDS}" # os.system(command) # print('*'*NUMBER) # print("\n\n") # progress_bar.update(1) # ################## Get vocals ################## # chunck_file = "chunks/output_files.txt" # print('*'*NUMBER) # if SEPARE_VOCALS: # print("Get vocals") # python_file = "separe_vocals.py" # command = f"python {python_file} {chunck_file} {DEVICE}" # os.system(command) # if REMOVE_FILES: # with open(chunck_file, 'r') as f: # files = f.read().splitlines() # for file in files: # command = f"rm {file}" # os.system(command) # else: # print("Moving chunks") # folder_vocals = "vocals" # folder_chunck = "chunks" # with open(f"{folder_vocals}/speakers.txt", 'w') as f: # f.write(str(0)) # if REMOVE_FILES: # command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/" # os.system(command) # else: # command = f"cp {folder_chunck}/*.mp3 {folder_vocals}/" # os.system(command) # print('*'*NUMBER) # print("\n\n") # progress_bar.update(1) # ################# Transcript vocals ################## # speakers_file = "vocals/speakers.txt" # if TRANSCRIBE_AUDIO: # print('*'*NUMBER) # print("Transcript vocals") # python_file = "transcribe.py" # language = "English" # command = f"python {python_file} {chunck_file} {language} {speakers_file} {DEVICE} {not SEPARE_VOCALS}" # os.system(command) # if REMOVE_FILES: # vocals_folder = "vocals" # with open(chunck_file, 'r') as f: # files = f.read().splitlines() # with open(speakers_file, 'r') as f: # speakers = f.read().splitlines() # speakers = int(speakers[0]) # for file in files: # if speakers > 0: # vocals_extension = "wav" # for i in range(speakers): # file_name, _ = file.split(".") # _, file_name = file_name.split("/") # vocal = f'{vocals_folder}/{file_name}_speaker{i:003d}.{vocals_extension}' # command = f"rm {vocal}" # os.system(command) # else: # vocals_extension = "mp3" # file_name, _ = file.split(".") # _, file_name = file_name.split("/") # vocal = f'{vocals_folder}/{file_name}.{vocals_extension}' # command = f"rm {vocal}" # os.system(command) # print('*'*NUMBER) # print("\n\n") # progress_bar.update(1) # ################## Concatenate transcriptions ################## # if CONCATENATE_TRANSCRIPTIONS: # print('*'*NUMBER) # print("Concatenate transcriptions") # python_file = "concat_transcriptions.py" # command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}" # os.system(command) # if REMOVE_FILES: # with open(chunck_file, 'r') as f: # files = f.read().splitlines() # for file in files: # file_name, _ = file.split(".") # _, file_name = file_name.split("/") # transcriptions_folder = "transcriptions" # transcription_extension = "srt" # command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}" # os.system(command) # print('*'*NUMBER) # print("\n\n") # progress_bar.update(1) # ################## Translate transcription ################## # target_languaje = "Español" # if TRANSLATE_TRANSCRIPTIONS: # print('*'*NUMBER) # print("Translate transcription") # transcription_file = "concatenated_transcriptions/download_audio.srt" # source_languaje = "English" # python_file = "translate_transcriptions.py" # command = f"python {python_file} {transcription_file} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}" # os.system(command) # if REMOVE_FILES: # command = f"rm {transcription_file}" # os.system(command) # print('*'*NUMBER) # print("\n\n") # progress_bar.update(1) # ################## Add subtitles to video ################## # if ADD_SUBTITLES_TO_VIDEO: # print('*'*NUMBER) # print("Add subtitles to video") # python_file = "add_subtitles_to_video.py" # transcription_file = f"translated_transcriptions/download_audio_{target_languaje}.srt" # input_video_file = "videos/download_video.mp4" # input_audio_file = "audios/download_audio.mp3" # command = f"python {python_file} {transcription_file} {input_video_file} {input_audio_file}" # os.system(command) # if REMOVE_FILES: # command = f"rm {input_video_file}" # os.system(command) # command = f"rm {input_audio_file}" # os.system(command) # command = f"rm {transcription_file}" # os.system(command) # command = f"rm chunks/output_files.txt" # os.system(command) # command = f"rm vocals/speakers.txt" # os.system(command) # print('*'*NUMBER) # print("\n\n") # progress_bar.update(1) # ################## Remove all ################## # if REMOVE_ALL: # command = f"rm audios/*" # os.system(command) # command = f"rm chunks/*" # os.system(command) # command = f"rm concatenated_transcriptions/*" # os.system(command) # command = f"rm transcriptions/*" # os.system(command) # command = f"rm translated_transcriptions/*" # os.system(command) # # Check if videos/download_video.mp4 exists # if os.path.isfile("videos/download_video.mp4"): # command = f"rm videos/download_video.mp4" # os.system(command) # # command = f"rm videos/*" # # os.system(command) # command = f"rm vocals/*" # os.system(command) # # def copy_url_from_clipboard(): # # return pyperclip.paste() # def clear_video_url(): # visible = False # image = gr.Image(visible=visible, scale=1) # source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True) # target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True) # translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible) # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False) # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible) # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible) # return ( # "", # image, # source_languaje, # target_languaje, # translate_button, # original_audio, # original_audio_transcribed, # original_audio_translated, # ) # def get_youtube_thumbnail(url): # yt = YouTube(url) # thumbnail_url = yt.thumbnail_url # return thumbnail_url # def is_valid_youtube_url(url): # patron_youtube = r'(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+' # if not re.match(patron_youtube, url): # return False # return True # def is_valid_url(url): # source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True) # target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True) # translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=True) # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=True, interactive=False) # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=True) # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=True) # subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False) # # Youtube # if "youtube" in url.lower() or "youtu.be" in url.lower(): # if is_valid_youtube_url(url): # thumbnail = get_youtube_thumbnail(url) # if thumbnail: # return ( # gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False), # source_languaje, # target_languaje, # translate_button, # gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False), # original_audio, # original_audio_transcribed, # original_audio_translated, # subtitled_video # ) # else: # return ( # gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False), # source_languaje, # target_languaje, # translate_button, # gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False), # original_audio, # original_audio_transcribed, # original_audio_translated, # subtitled_video # ) # # Twitch # elif "twitch" in url.lower() or "twitch.tv" in url.lower(): # return ( # gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False), # source_languaje, # target_languaje, # translate_button, # gr.Textbox(value=TWITCH, label="Stream page", elem_id="stream_page", visible=False), # original_audio, # original_audio_transcribed, # original_audio_translated, # subtitled_video # ) # # Error # visible = False # image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False) # source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True) # target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True) # translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible) # stream_page = gr.Textbox(value=ERROR, label="Stream page", elem_id="stream_page", visible=visible) # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False) # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible) # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible) # subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False) # return ( # image, # source_languaje, # target_languaje, # translate_button, # stream_page, # original_audio, # original_audio_transcribed, # original_audio_translated, # subtitled_video # ) # def get_audio_and_video_from_video(url, stream_page): # python_file = "download.py" # command = f"python {python_file} {url}" # os.system(command) # # sleep(5) # audio = "audios/download_audio.mp3" # video = "videos/download_video.mp4" # return ( # gr.Audio(value=audio, label="Original audio", elem_id="original_audio", visible=True, interactive=False), # gr.Textbox(value=audio, label="Original audio path", elem_id="original_audio_path", visible=False), # gr.Textbox(value=video, label="Original video path", elem_id="original_video_path", visible=False) # ) # def trascribe_audio(audio_path, source_languaje): # python_file = "slice_audio.py" # command = f"python {python_file} {audio_path} {SECONDS}" # os.system(command) # folder_vocals = "vocals" # folder_chunck = "chunks" # with open(f"{folder_vocals}/speakers.txt", 'w') as f: # f.write(str(0)) # command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/" # os.system(command) # python_file = "transcribe.py" # chunck_file = "chunks/output_files.txt" # speakers_file = "vocals/speakers.txt" # command = f"python {python_file} {chunck_file} {source_languaje} {speakers_file} {DEVICE} {not SEPARE_VOCALS}" # os.system(command) # with open(chunck_file, 'r') as f: # files = f.read().splitlines() # with open(speakers_file, 'r') as f: # speakers = f.read().splitlines() # speakers = int(speakers[0]) # for file in files: # if speakers > 0: # vocals_extension = "wav" # for i in range(speakers): # file_name, _ = file.split(".") # _, file_name = file_name.split("/") # vocal = f'{folder_vocals}/{file_name}_speaker{i:003d}.{vocals_extension}' # command = f"rm {vocal}" # os.system(command) # else: # vocals_extension = "mp3" # file_name, _ = file.split(".") # _, file_name = file_name.split("/") # vocal = f'{folder_vocals}/{file_name}.{vocals_extension}' # command = f"rm {vocal}" # os.system(command) # python_file = "concat_transcriptions.py" # command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}" # os.system(command) # with open(chunck_file, 'r') as f: # files = f.read().splitlines() # for file in files: # file_name, _ = file.split(".") # _, file_name = file_name.split("/") # transcriptions_folder = "transcriptions" # transcription_extension = "srt" # command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}" # os.system(command) # audio_transcribed = "concatenated_transcriptions/download_audio.srt" # with open(audio_transcribed, 'r') as f: # result = f.read() # return ( # result, # gr.Textbox(value=audio_transcribed, label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False) # ) # def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje): # python_file = "translate_transcriptions.py" # command = f"python {python_file} {original_audio_transcribed_path} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}" # os.system(command) # translated_transcription = f"translated_transcriptions/download_audio_{target_languaje}.srt" # with open(translated_transcription, 'r') as f: # result = f.read() # transcription_file = "concatenated_transcriptions/download_audio.srt" # command = f"rm {transcription_file}" # os.system(command) # return ( # result, # gr.Textbox(value=translated_transcription, label="Original audio translated", elem_id="original_audio_translated", visible=False) # ) # def add_translated_subtitles_to_video(original_video_path, original_audio_path, original_audio_translated_path): # python_file = "add_subtitles_to_video.py" # command = f"python {python_file} {original_audio_translated_path} {original_video_path} {original_audio_path}" # os.system(command) # command = f"rm {original_video_path}" # os.system(command) # command = f"rm {original_audio_path}" # os.system(command) # command = f"rm {original_audio_translated_path}" # os.system(command) # command = f"rm chunks/output_files.txt" # os.system(command) # command = f"rm vocals/speakers.txt" # os.system(command) # subtitled_video = "videos/download_video_with_subtitles.mp4" # return gr.Video(value=subtitled_video, label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False) def subtify(): with gr.Blocks() as demo: # Layout gr.Markdown("""# Subtify""") gr.Markdown("""# Subtify""") # with gr.Row(variant="panel"): # url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True) # copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0) # delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0) # stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False) # visible = False # with gr.Row(equal_height=False): # image = gr.Image(visible=visible, scale=1) # with gr.Column(): # with gr.Row(): # source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True) # target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True) # with gr.Row(): # subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible) # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False) # original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=False) # original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=False) # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible) # original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False) # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible) # original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False) # subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False) # # Events # # copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox) # delete_button.click( # fn=clear_video_url, # outputs=[ # url_textbox, # image, # source_languaje, # target_languaje, # subtify_button, # original_audio, # original_audio_transcribed, # original_audio_translated, # ] # ) # url_textbox.change( # fn=is_valid_url, # inputs=url_textbox, # outputs=[ # image, # source_languaje, # target_languaje, # subtify_button, # stream_page, # original_audio, # original_audio_transcribed, # original_audio_translated, # subtitled_video # ] # ) # subtify_button.click(fn=get_audio_and_video_from_video, inputs=[url_textbox, stream_page], outputs=[original_audio, original_audio_path, original_video_path]) # original_audio.change(fn=trascribe_audio, inputs=[original_audio_path, source_languaje], outputs=[original_audio_transcribed, original_audio_transcribed_path]) # original_audio_transcribed.change(fn=translate_transcription, inputs=[original_audio_transcribed_path, source_languaje, target_languaje], outputs=[original_audio_translated, original_audio_translated_path]) # original_audio_translated.change(fn=add_translated_subtitles_to_video, inputs=[original_video_path, original_audio_path, original_audio_translated_path], outputs=subtitled_video) demo.launch() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--no_ui", action="store_true") args = parser.parse_args() if args.no_ui: pass # subtify_no_ui() else: subtify()