import csv import datetime import requests import gradio as gr import pandas as pd from io import BytesIO from pathlib import Path from urllib.parse import urlparse from pydub import AudioSegment, silence def format_seconds(secs): t = datetime.datetime( year=1, month=1, day=1, hour=0, minute=0 ) + datetime.timedelta(seconds=secs) return t.strftime("%M:%S.%f")[:-3] def get_filename_and_extension(url): parsed_url = urlparse(url) path = parsed_url.path filename = Path(path).name filename_without_extension = Path(filename).stem file_extension = Path(filename).suffix return filename, filename_without_extension, file_extension def calculate_times(input_url, input_text, ms_before, ms_after): _, _, file_extension = get_filename_and_extension(input_url) file_extension = file_extension.replace(".", "") res = requests.get(input_url) audio = AudioSegment.from_file(BytesIO(res.content), file_extension) non_silent_parts = silence.detect_nonsilent( audio, min_silence_len=1250, silence_thresh=-80 ) segments = [ ( format_seconds((start - ms_before) / 1000), format_seconds((stop + ms_after) / 1000), ) for start, stop in non_silent_parts ] df = pd.DataFrame({"text": [], "start": [], "stop": [], "file": []}) lines = input_text.splitlines() if len(lines) != len(segments): msg = f"DETECTED CLIPS AND INPUT LINES DO NOT MATCH!\n\nYou are expecting {len(lines)} clips BUT {len(segments)} segments have been found in the video file.\n\nPlease, review the list of clips or transcribe the audio to check the clips.\n\nUSEFUL FREE TOOLS:\n\nTranscribe audio to VTT file\nhttps://replicate.com/openai/whisper\n\nVTT file viewer\nhttps://www.happyscribe.com/subtitle-tools/online-subtitle-editor/free" df.loc[len(df.index)] = ["", "", "", ""] return msg, None, df else: res = [] for i in range(len(segments)): line = lines[i].rstrip() res.append(f"{line}\t{segments[i][0]}\t{segments[i][1]}\t{input_url}") df.loc[len(df.index)] = [line, segments[i][0], segments[i][1], input_url] df.to_csv( "clips.tsv", sep="\t", encoding="utf-8", index=False, header=False, quoting=csv.QUOTE_NONE, ) return "\n".join(res), "clips.tsv", df def load_video(input_url): if input_url: return input_url return None css = """ .required {background-color: #FFCCCB !important, font-size: 24px !important} """ with gr.Blocks(title="Start and stop times", css=css) as app: gr.Markdown( """# Start and stop times generator Please, fill the Video URL and Clip texts textboxes and click the Run button""" ) with gr.Row(): with gr.Column(scale=3): text1 = gr.Textbox( lines=1, placeholder="Video URL...", label="Video URL", elem_classes=["required"], ) text2 = gr.Textbox( lines=5, max_lines=10, placeholder="List of clip texts...", label="Clip texts", elem_classes=["required"], ) slider1 = gr.Slider( minimum=0, maximum=1000, step=50, value=0, label="Milliseconds BEFORE each clip", ) slider2 = gr.Slider( minimum=0, maximum=1000, step=50, value=500, label="Milliseconds AFTER each clip", ) btn_submit = gr.Button(value="Run", variant="primary", size="sm") video = gr.Video( format="mp4", label="Video file", show_label=True, interactive=False ) with gr.Column(scale=5): file = gr.File( label="Clips", show_label=True, file_count=1, interactive=False ) lines = gr.Textbox( lines=10, label="Clips", interactive=False, show_copy_button=True ) data = gr.Dataframe( label="Clips", headers=["text", "start", "stop", "file"], datatype=["str", "str", "str", "str"], row_count=0, ) btn_submit.click( calculate_times, inputs=[text1, text2, slider1, slider2], outputs=[lines, file, data], ) text1.blur(load_video, inputs=[text1], outputs=[video]) app.launch()