File size: 4,658 Bytes
787cd7d
7a6c0da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa786d6
7a6c0da
 
fa786d6
 
 
7a6c0da
 
 
 
 
 
fa786d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a6c0da
fa786d6
 
 
7a6c0da
fa786d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8118032
fa786d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a6c0da
fe5350d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import csv
import datetime
import requests
import gradio as gr
import pandas as pd
from io import BytesIO
from pathlib import Path
from urllib.parse import urlparse
from pydub import AudioSegment, silence


def format_seconds(secs):
    t = datetime.datetime(
        year=1, month=1, day=1, hour=0, minute=0
    ) + datetime.timedelta(seconds=secs)
    return t.strftime("%M:%S.%f")[:-3]


def get_filename_and_extension(url):
    parsed_url = urlparse(url)
    path = parsed_url.path
    filename = Path(path).name
    filename_without_extension = Path(filename).stem
    file_extension = Path(filename).suffix
    return filename, filename_without_extension, file_extension


def calculate_times(input_url, input_text, ms_before, ms_after):
    _, _, file_extension = get_filename_and_extension(input_url)
    file_extension = file_extension.replace(".", "")
    res = requests.get(input_url)
    audio = AudioSegment.from_file(BytesIO(res.content), file_extension)
    non_silent_parts = silence.detect_nonsilent(
        audio, min_silence_len=1250, silence_thresh=-80
    )
    segments = [
        (
            format_seconds((start - ms_before) / 1000),
            format_seconds((stop + ms_after) / 1000),
        )
        for start, stop in non_silent_parts
    ]
    df = pd.DataFrame({"text": [], "start": [], "stop": [], "file": []})
    lines = input_text.splitlines()
    if len(lines) != len(segments):
        msg = f"DETECTED CLIPS AND INPUT LINES DO NOT MATCH!\n\nYou are expecting {len(lines)} clips BUT {len(segments)} segments have been found in the video file.\n\nPlease, review the list of clips or transcribe the audio to check the clips.\n\nUSEFUL FREE TOOLS:\n\nTranscribe audio to VTT file\nhttps://replicate.com/openai/whisper\n\nVTT file viewer\nhttps://www.happyscribe.com/subtitle-tools/online-subtitle-editor/free"
        df.loc[len(df.index)] = ["", "", "", ""]
        return msg, None, df
    else:
        res = []
        for i in range(len(segments)):
            line = lines[i].rstrip()
            res.append(f"{line}\t{segments[i][0]}\t{segments[i][1]}\t{input_url}")
            df.loc[len(df.index)] = [line, segments[i][0], segments[i][1], input_url]
        df.to_csv(
            "clips.tsv",
            sep="\t",
            encoding="utf-8",
            index=False,
            header=False,
            quoting=csv.QUOTE_NONE,
        )
        return "\n".join(res), "clips.tsv", df


def load_video(input_url):
    if input_url:
        return input_url
    return None


css = """
.required {background-color: #FFCCCB !important, font-size: 24px !important}
"""


with gr.Blocks(title="Start and stop times", css=css) as app:
    gr.Markdown(
        """# Start and stop times generator
    Please, fill the Video URL and Clip texts textboxes and click the Run button"""
    )
    with gr.Row():
        with gr.Column(scale=3):
            text1 = gr.Textbox(
                lines=1,
                placeholder="Video URL...",
                label="Video URL",
                elem_classes=["required"],
            )
            text2 = gr.Textbox(
                lines=5,
                max_lines=10,
                placeholder="List of clip texts...",
                label="Clip texts",
                elem_classes=["required"],
            )
            slider1 = gr.Slider(
                minimum=0,
                maximum=1000,
                step=50,
                value=0,
                label="Milliseconds BEFORE each clip",
            )
            slider2 = gr.Slider(
                minimum=0,
                maximum=1000,
                step=50,
                value=500,
                label="Milliseconds AFTER each clip",
            )
            btn_submit = gr.Button(value="Run", variant="primary", size="sm")
            video = gr.Video(
                format="mp4", label="Video file", show_label=True, interactive=False
            )
        with gr.Column(scale=5):
            file = gr.File(
                label="Clips", show_label=True, file_count=1, interactive=False
            )
            lines = gr.Textbox(
                lines=10, label="Clips", interactive=False, show_copy_button=True
            )
            data = gr.Dataframe(
                label="Clips",
                headers=["text", "start", "stop", "file"],
                datatype=["str", "str", "str", "str"],
                row_count=0,
            )
    btn_submit.click(
        calculate_times,
        inputs=[text1, text2, slider1, slider2],
        outputs=[lines, file, data],
    )
    text1.blur(load_video, inputs=[text1], outputs=[video])

app.launch()