subtify / app.py
Maximofn's picture
Rename variables and functions into app.py to make code easyer to read
1fbddba
raw
history blame
No virus
24.4 kB
import gradio as gr
import argparse
import os
import torch
from time import sleep
from tqdm import tqdm
from lang_list import union_language_dict
# import pyperclip
from pytube import YouTube
import re
NUMBER = 100
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DOWNLOAD = True
SLICE_AUDIO = True
SEPARE_VOCALS = False
TRANSCRIBE_AUDIO = True
CONCATENATE_TRANSCRIPTIONS = True
TRANSLATE_TRANSCRIPTIONS = True
ADD_SUBTITLES_TO_VIDEO = True
REMOVE_FILES = False
if DEVICE == "cpu":
# I supose that I am on huggingface server
SECONDS = 300
else:
SECONDS = 300
YOUTUBE = "youtube"
TWITCH = "twitch"
ERROR = "error"
language_dict = union_language_dict()
def subtify_no_ui():
number_works = 7
progress_bar = tqdm(total=number_works, desc="Subtify")
folder_vocals = "vocals"
folder_chunck = "chunks"
folder_concatenated = "concatenated_transcriptions"
folder_translated_transcriptions = "translated_transcriptions"
if not os.path.exists(folder_vocals):
os.makedirs(folder_vocals)
if not os.path.exists(folder_chunck):
os.makedirs(folder_chunck)
if not os.path.exists(folder_concatenated):
os.makedirs(folder_concatenated)
if not os.path.exists(folder_translated_transcriptions):
os.makedirs(folder_translated_transcriptions)
################## Download video and audio ##################
if DOWNLOAD:
print('*'*NUMBER)
# url = "https://www.twitch.tv/videos/1936119752" # twitch Rob Mula 2 horas
# url = "https://www.youtube.com/watch?v=yX5EJf4R77s" # ✅ debate, varios hablantes, 3 minutos
# url = "https://www.youtube.com/watch?v=cgx0QnXo1OU" # ✅ smart home, un solo hablante, 4:42 minutos
url = "https://www.youtube.com/watch?v=dgOBxhi19T8" # ✅ rob mula, muchos hablantes, 4:28 minutos
# url = "https://www.youtube.com/watch?v=Coj72EzmX20" # rob mula, un solo hablante, 16 minutos
# url = "https://www.youtube.com/watch?v=Tqth0fKo0_g" # Conversación short
print(f"Downloading video and audio from {url}")
python_file = "download.py"
command = f"python {python_file} {url}"
os.system(command)
sleep(5)
print('*'*NUMBER)
print("\n\n")
progress_bar.update(1)
################## Slice audio ##################
if SLICE_AUDIO:
print('*'*NUMBER)
print("Slicing audio")
python_file = "slice_audio.py"
audio = "audios/download_audio.mp3"
command = f"python {python_file} {audio} {SECONDS}"
os.system(command)
print('*'*NUMBER)
print("\n\n")
progress_bar.update(1)
################## Get vocals ##################
chunck_file = "chunks/output_files.txt"
print('*'*NUMBER)
if SEPARE_VOCALS:
print("Get vocals")
python_file = "separe_vocals.py"
command = f"python {python_file} {chunck_file} {DEVICE}"
os.system(command)
if REMOVE_FILES:
with open(chunck_file, 'r') as f:
files = f.read().splitlines()
for file in files:
command = f"rm {file}"
os.system(command)
else:
print("Moving chunks")
with open(f"{folder_vocals}/speakers.txt", 'w') as f:
f.write(str(0))
if REMOVE_FILES:
command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
os.system(command)
else:
command = f"cp {folder_chunck}/*.mp3 {folder_vocals}/"
os.system(command)
print('*'*NUMBER)
print("\n\n")
progress_bar.update(1)
################# Transcript vocals ##################
speakers_file = "vocals/speakers.txt"
if TRANSCRIBE_AUDIO:
print('*'*NUMBER)
print("Transcript vocals")
python_file = "transcribe.py"
language = "English"
command = f"python {python_file} {chunck_file} {language} {speakers_file} {DEVICE} {not SEPARE_VOCALS}"
os.system(command)
if REMOVE_FILES:
vocals_folder = "vocals"
with open(chunck_file, 'r') as f:
files = f.read().splitlines()
with open(speakers_file, 'r') as f:
speakers = f.read().splitlines()
speakers = int(speakers[0])
for file in files:
if speakers > 0:
vocals_extension = "wav"
for i in range(speakers):
file_name, _ = file.split(".")
_, file_name = file_name.split("/")
vocal = f'{vocals_folder}/{file_name}_speaker{i:003d}.{vocals_extension}'
command = f"rm {vocal}"
os.system(command)
else:
vocals_extension = "mp3"
file_name, _ = file.split(".")
_, file_name = file_name.split("/")
vocal = f'{vocals_folder}/{file_name}.{vocals_extension}'
command = f"rm {vocal}"
os.system(command)
print('*'*NUMBER)
print("\n\n")
progress_bar.update(1)
################## Concatenate transcriptions ##################
if CONCATENATE_TRANSCRIPTIONS:
print('*'*NUMBER)
print("Concatenate transcriptions")
python_file = "concat_transcriptions.py"
command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
os.system(command)
if REMOVE_FILES:
with open(chunck_file, 'r') as f:
files = f.read().splitlines()
for file in files:
file_name, _ = file.split(".")
_, file_name = file_name.split("/")
transcriptions_folder = "transcriptions"
transcription_extension = "srt"
command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
os.system(command)
print('*'*NUMBER)
print("\n\n")
progress_bar.update(1)
################## Translate transcription ##################
target_languaje = "Español"
if TRANSLATE_TRANSCRIPTIONS:
print('*'*NUMBER)
print("Translate transcription")
transcription_file = "concatenated_transcriptions/download_audio.srt"
source_languaje = "English"
python_file = "translate_transcriptions.py"
command = f"python {python_file} {transcription_file} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
os.system(command)
if REMOVE_FILES:
command = f"rm {transcription_file}"
os.system(command)
print('*'*NUMBER)
print("\n\n")
progress_bar.update(1)
################## Add subtitles to video ##################
if ADD_SUBTITLES_TO_VIDEO:
print('*'*NUMBER)
print("Add subtitles to video")
python_file = "add_subtitles_to_video.py"
transcription_file = f"translated_transcriptions/download_audio_{target_languaje}.srt"
input_video_file = "videos/download_video.mp4"
input_audio_file = "audios/download_audio.mp3"
command = f"python {python_file} {transcription_file} {input_video_file} {input_audio_file}"
os.system(command)
if REMOVE_FILES:
command = f"rm {input_video_file}"
os.system(command)
command = f"rm {input_audio_file}"
os.system(command)
command = f"rm {transcription_file}"
os.system(command)
command = f"rm chunks/output_files.txt"
os.system(command)
command = f"rm vocals/speakers.txt"
os.system(command)
print('*'*NUMBER)
print("\n\n")
progress_bar.update(1)
def remove_all_files():
command = f"rm -r audios"
os.system(command)
command = f"rm -r chunks"
os.system(command)
command = f"rm -r concatenated_transcriptions"
os.system(command)
command = f"rm -r transcriptions"
os.system(command)
command = f"rm -r translated_transcriptions"
os.system(command)
command = f"rm -r videos"
os.system(command)
command = f"rm -r vocals"
os.system(command)
# # def copy_url_from_clipboard():
# # return pyperclip.paste()
def reset_frontend():
visible = False
return (
"",
gr.Image(visible=visible),
gr.Dropdown(visible=visible),
gr.Dropdown(visible=visible),
gr.Dropdown(visible=visible),
gr.Button(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Video(visible=visible),
)
def get_youtube_thumbnail(url):
yt = YouTube(url)
thumbnail_url = yt.thumbnail_url
return thumbnail_url
def is_valid_youtube_url(url):
# This regular expression should match the following YouTube URL formats:
# - https://youtube.com/watch?v=video_id
# - https://www.youtube.com/watch?v=video_id
# - https://youtu.be/video_id
patron_youtube = r'(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+'
return bool(re.match(patron_youtube, url))
def is_valid_twitch_url(url):
# This regular expression should match the following Twitch URL formats:
# - https://twitch.tv/channel_name
# - https://www.twitch.tv/channel_name
# - https://twitch.tv/videos/video_id
twitch_pattern = r'(https?://)?(www\.)?twitch\.tv/(videos/\d+|\w+)'
return bool(re.match(twitch_pattern, url))
def is_valid_url(url):
num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
number_of_speakers = gr.Dropdown(visible=True, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=True)
# Youtube
if "youtube" in url.lower() or "youtu.be" in url.lower():
if is_valid_youtube_url(url):
thumbnail = get_youtube_thumbnail(url)
if thumbnail:
return (
gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
source_languaje,
target_languaje,
number_of_speakers,
subtify_button,
)
else:
return (
gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
source_languaje,
target_languaje,
number_of_speakers,
subtify_button,
)
# Twitch
elif "twitch" in url.lower() or "twitch.tv" in url.lower():
if is_valid_twitch_url(url):
return (
gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
source_languaje,
target_languaje,
number_of_speakers,
subtify_button,
)
# Error
visible = False
image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
return (
image,
source_languaje,
target_languaje,
number_of_speakers,
subtify_button,
)
def change_visibility_texboxes():
return (
gr.Textbox(value="Done"),
gr.Textbox(visible=True),
gr.Textbox(visible=True),
gr.Textbox(visible=True),
gr.Textbox(visible=True),
gr.Textbox(visible=True),
gr.Textbox(visible=True),
)
def get_audio_and_video_from_video(url):
python_file = "download.py"
command = f"python {python_file} {url}"
os.system(command)
sleep(1)
audio = "audios/download_audio.mp3"
video = "videos/download_video.mp4"
return (
gr.Textbox(value="Ok"),
gr.Textbox(value=audio),
gr.Textbox(value=video),
)
def slice_audio(audio_path):
folder_vocals = "vocals"
folder_chunck = "chunks"
if not os.path.exists(folder_vocals):
os.makedirs(folder_vocals)
if not os.path.exists(folder_chunck):
os.makedirs(folder_chunck)
python_file = "slice_audio.py"
command = f"python {python_file} {audio_path} {SECONDS}"
os.system(command)
with open(f"{folder_vocals}/speakers.txt", 'w') as f:
f.write(str(0))
command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
os.system(command)
return (
gr.Textbox(value="Ok")
)
def trascribe_audio(source_languaje):
folder_vocals = "vocals"
python_file = "transcribe.py"
chunck_file = "chunks/output_files.txt"
speakers_file = "vocals/speakers.txt"
command = f"python {python_file} {chunck_file} {source_languaje} {speakers_file} {DEVICE} {not SEPARE_VOCALS}"
os.system(command)
with open(chunck_file, 'r') as f:
files = f.read().splitlines()
with open(speakers_file, 'r') as f:
speakers = f.read().splitlines()
speakers = int(speakers[0])
for file in files:
if speakers > 0:
vocals_extension = "wav"
for i in range(speakers):
file_name, _ = file.split(".")
_, file_name = file_name.split("/")
vocal = f'{folder_vocals}/{file_name}_speaker{i:003d}.{vocals_extension}'
command = f"rm {vocal}"
os.system(command)
else:
vocals_extension = "mp3"
file_name, _ = file.split(".")
_, file_name = file_name.split("/")
vocal = f'{folder_vocals}/{file_name}.{vocals_extension}'
command = f"rm {vocal}"
os.system(command)
return (
gr.Textbox(value="Ok")
)
def concatenate_transcriptions():
folder_concatenated = "concatenated_transcriptions"
if not os.path.exists(folder_concatenated):
os.makedirs(folder_concatenated)
chunck_file = "chunks/output_files.txt"
speakers_file = "vocals/speakers.txt"
python_file = "concat_transcriptions.py"
command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
os.system(command)
with open(chunck_file, 'r') as f:
files = f.read().splitlines()
for file in files:
file_name, _ = file.split(".")
_, file_name = file_name.split("/")
transcriptions_folder = "transcriptions"
transcription_extension = "srt"
command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
os.system(command)
audio_transcribed = "concatenated_transcriptions/download_audio.srt"
return (
gr.Textbox(value="Ok"),
gr.Textbox(value=audio_transcribed),
)
def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje):
folder_translated_transcriptions = "translated_transcriptions"
if not os.path.exists(folder_translated_transcriptions):
os.makedirs(folder_translated_transcriptions)
python_file = "translate_transcriptions.py"
command = f"python {python_file} {original_audio_transcribed_path} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
os.system(command)
translated_transcription = f"translated_transcriptions/download_audio_{target_languaje}.srt"
transcription_file = "concatenated_transcriptions/download_audio.srt"
command = f"rm {transcription_file}"
os.system(command)
return (
gr.Textbox(value="Ok"),
gr.Textbox(value=translated_transcription)
)
def add_translated_subtitles_to_video(original_video_path, original_audio_path, original_audio_translated_path):
python_file = "add_subtitles_to_video.py"
command = f"python {python_file} {original_audio_translated_path} {original_video_path} {original_audio_path}"
os.system(command)
command = f"rm {original_video_path}"
os.system(command)
command = f"rm {original_audio_path}"
os.system(command)
command = f"rm {original_audio_translated_path}"
os.system(command)
command = f"rm chunks/output_files.txt"
os.system(command)
command = f"rm vocals/speakers.txt"
os.system(command)
subtitled_video = "videos/download_video_with_subtitles.mp4"
return (
gr.Textbox(value="Ok"),
gr.Video(value=subtitled_video, visible=True),
)
def subtify():
with gr.Blocks() as demo:
num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
# Layout
gr.Markdown("""# Subtify""")
with gr.Row(variant="panel"):
url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
# copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
visible = False
with gr.Row(equal_height=False):
image = gr.Image(visible=visible, scale=1)
with gr.Column():
with gr.Row():
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True, info="Language of the video")
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True, info="Language to translate the subtitles")
number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True, info="Number of speakers in the video, if you don't know, select 10")
with gr.Row():
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
auxiliar_block = gr.Textbox(placeholder="Waiting", label="Auxiliar block", elem_id="auxiliar_block", interactive=False, visible=visible)
with gr.Row():
video_donwloaded_progress_info = gr.Textbox(placeholder="Waiting", label="Video downloaded progress info", elem_id="video_donwloaded_progress_info", interactive=False, visible=visible)
video_sliced_progress_info = gr.Textbox(placeholder="Waiting", label="Video sliced progress info", elem_id="video_sliced_progress_info", interactive=False, visible=visible)
video_transcribed_progress_info = gr.Textbox(placeholder="Waiting", label="Video transcribed progress info", elem_id="video_transcribed_progress_info", interactive=False, visible=visible)
transcriptions_concatenated_progress_info = gr.Textbox(placeholder="Waiting", label="Transcriptions concatenated progress info", elem_id="transcriptions_concatenated_progress_info", interactive=False, visible=visible)
video_translated_progress_info = gr.Textbox(placeholder="Waiting", label="Transcription translated progress info", elem_id="transcription_translated_progress_info", interactive=False, visible=visible)
video_subtitled_progress_info = gr.Textbox(placeholder="Waiting", label="Video subtitled progress info", elem_id="video_subtitled_progress_info", interactive=False, visible=visible)
original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=visible)
original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=visible)
original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=visible)
original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=visible)
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=visible)
# Events
# copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
delete_button.click(
fn=reset_frontend,
outputs=[
url_textbox,
image,
source_languaje,
target_languaje,
number_of_speakers,
subtify_button,
auxiliar_block,
video_donwloaded_progress_info,
video_sliced_progress_info,
video_transcribed_progress_info,
transcriptions_concatenated_progress_info,
video_translated_progress_info,
video_subtitled_progress_info,
subtitled_video,
]
)
url_textbox.change(
fn=is_valid_url,
inputs=url_textbox,
outputs=[image, source_languaje, target_languaje, number_of_speakers, subtify_button]
)
subtify_button.click(
fn=change_visibility_texboxes,
outputs=[auxiliar_block, video_donwloaded_progress_info, video_sliced_progress_info, video_transcribed_progress_info, transcriptions_concatenated_progress_info, video_translated_progress_info, video_subtitled_progress_info]
)
auxiliar_block.change(
fn=get_audio_and_video_from_video,
inputs=[url_textbox],
outputs=[video_donwloaded_progress_info, original_audio_path, original_video_path]
)
video_donwloaded_progress_info.change(
fn=slice_audio,
inputs=[original_audio_path],
outputs=[video_sliced_progress_info]
)
video_sliced_progress_info.change(
fn=trascribe_audio,
inputs=[source_languaje],
outputs=[video_transcribed_progress_info]
)
video_transcribed_progress_info.change(
fn=concatenate_transcriptions,
outputs=[transcriptions_concatenated_progress_info, original_audio_transcribed_path]
)
transcriptions_concatenated_progress_info.change(
fn=translate_transcription,
inputs=[original_audio_transcribed_path, source_languaje, target_languaje],
outputs=[video_translated_progress_info, original_audio_translated_path]
)
video_translated_progress_info.change(
fn=add_translated_subtitles_to_video,
inputs=[original_video_path, original_audio_path, original_audio_translated_path],
outputs=[video_subtitled_progress_info, subtitled_video]
)
demo.launch()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--no_ui", action="store_true")
parser.add_argument("--remove_all_files", action="store_true")
args = parser.parse_args()
if args.no_ui:
subtify_no_ui()
elif args.remove_all_files:
remove_all_files()
else:
subtify()