Spaces:

Quantified
/

clip-detector

Running

File size: 4,658 Bytes

import csv
import datetime
import requests
import gradio as gr
import pandas as pd
from io import BytesIO
from pathlib import Path
from urllib.parse import urlparse
from pydub import AudioSegment, silence


def format_seconds(secs):
    t = datetime.datetime(
        year=1, month=1, day=1, hour=0, minute=0
    ) + datetime.timedelta(seconds=secs)
    return t.strftime("%M:%S.%f")[:-3]


def get_filename_and_extension(url):
    parsed_url = urlparse(url)
    path = parsed_url.path
    filename = Path(path).name
    filename_without_extension = Path(filename).stem
    file_extension = Path(filename).suffix
    return filename, filename_without_extension, file_extension


def calculate_times(input_url, input_text, ms_before, ms_after):
    _, _, file_extension = get_filename_and_extension(input_url)
    file_extension = file_extension.replace(".", "")
    res = requests.get(input_url)
    audio = AudioSegment.from_file(BytesIO(res.content), file_extension)
    non_silent_parts = silence.detect_nonsilent(
        audio, min_silence_len=1250, silence_thresh=-80
    )
    segments = [
        (
            format_seconds((start - ms_before) / 1000),
            format_seconds((stop + ms_after) / 1000),
        )
        for start, stop in non_silent_parts
    ]
    df = pd.DataFrame({"text": [], "start": [], "stop": [], "file": []})
    lines = input_text.splitlines()
    if len(lines) != len(segments):
        msg = f"DETECTED CLIPS AND INPUT LINES DO NOT MATCH!\n\nYou are expecting {len(lines)} clips BUT {len(segments)} segments have been found in the video file.\n\nPlease, review the list of clips or transcribe the audio to check the clips.\n\nUSEFUL FREE TOOLS:\n\nTranscribe audio to VTT file\nhttps://replicate.com/openai/whisper\n\nVTT file viewer\nhttps://www.happyscribe.com/subtitle-tools/online-subtitle-editor/free"
        df.loc[len(df.index)] = ["", "", "", ""]
        return msg, None, df
    else:
        res = []
        for i in range(len(segments)):
            line = lines[i].rstrip()
            res.append(f"{line}\t{segments[i][0]}\t{segments[i][1]}\t{input_url}")
            df.loc[len(df.index)] = [line, segments[i][0], segments[i][1], input_url]
        df.to_csv(
            "clips.tsv",
            sep="\t",
            encoding="utf-8",
            index=False,
            header=False,
            quoting=csv.QUOTE_NONE,
        )
        return "\n".join(res), "clips.tsv", df


def load_video(input_url):
    if input_url:
        return input_url
    return None


css = """
.required {background-color: #FFCCCB !important, font-size: 24px !important}
"""


with gr.Blocks(title="Start and stop times", css=css) as app:
    gr.Markdown(
        """# Start and stop times generator
    Please, fill the Video URL and Clip texts textboxes and click the Run button"""
    )
    with gr.Row():
        with gr.Column(scale=3):
            text1 = gr.Textbox(
                lines=1,
                placeholder="Video URL...",
                label="Video URL",
                elem_classes=["required"],
            )
            text2 = gr.Textbox(
                lines=5,
                max_lines=10,
                placeholder="List of clip texts...",
                label="Clip texts",
                elem_classes=["required"],
            )
            slider1 = gr.Slider(
                minimum=0,
                maximum=1000,
                step=50,
                value=0,
                label="Milliseconds BEFORE each clip",
            )
            slider2 = gr.Slider(
                minimum=0,
                maximum=1000,
                step=50,
                value=500,
                label="Milliseconds AFTER each clip",
            )
            btn_submit = gr.Button(value="Run", variant="primary", size="sm")
            video = gr.Video(
                format="mp4", label="Video file", show_label=True, interactive=False
            )
        with gr.Column(scale=5):
            file = gr.File(
                label="Clips", show_label=True, file_count=1, interactive=False
            )
            lines = gr.Textbox(
                lines=10, label="Clips", interactive=False, show_copy_button=True
            )
            data = gr.Dataframe(
                label="Clips",
                headers=["text", "start", "stop", "file"],
                datatype=["str", "str", "str", "str"],
                row_count=0,
            )
    btn_submit.click(
        calculate_times,
        inputs=[text1, text2, slider1, slider2],
        outputs=[lines, file, data],
    )
    text1.blur(load_video, inputs=[text1], outputs=[video])

app.launch()