clip-detector / app.py
sergiomar73's picture
Remove examples
8118032
raw
history blame contribute delete
No virus
4.66 kB
import csv
import datetime
import requests
import gradio as gr
import pandas as pd
from io import BytesIO
from pathlib import Path
from urllib.parse import urlparse
from pydub import AudioSegment, silence
def format_seconds(secs):
t = datetime.datetime(
year=1, month=1, day=1, hour=0, minute=0
) + datetime.timedelta(seconds=secs)
return t.strftime("%M:%S.%f")[:-3]
def get_filename_and_extension(url):
parsed_url = urlparse(url)
path = parsed_url.path
filename = Path(path).name
filename_without_extension = Path(filename).stem
file_extension = Path(filename).suffix
return filename, filename_without_extension, file_extension
def calculate_times(input_url, input_text, ms_before, ms_after):
_, _, file_extension = get_filename_and_extension(input_url)
file_extension = file_extension.replace(".", "")
res = requests.get(input_url)
audio = AudioSegment.from_file(BytesIO(res.content), file_extension)
non_silent_parts = silence.detect_nonsilent(
audio, min_silence_len=1250, silence_thresh=-80
)
segments = [
(
format_seconds((start - ms_before) / 1000),
format_seconds((stop + ms_after) / 1000),
)
for start, stop in non_silent_parts
]
df = pd.DataFrame({"text": [], "start": [], "stop": [], "file": []})
lines = input_text.splitlines()
if len(lines) != len(segments):
msg = f"DETECTED CLIPS AND INPUT LINES DO NOT MATCH!\n\nYou are expecting {len(lines)} clips BUT {len(segments)} segments have been found in the video file.\n\nPlease, review the list of clips or transcribe the audio to check the clips.\n\nUSEFUL FREE TOOLS:\n\nTranscribe audio to VTT file\nhttps://replicate.com/openai/whisper\n\nVTT file viewer\nhttps://www.happyscribe.com/subtitle-tools/online-subtitle-editor/free"
df.loc[len(df.index)] = ["", "", "", ""]
return msg, None, df
else:
res = []
for i in range(len(segments)):
line = lines[i].rstrip()
res.append(f"{line}\t{segments[i][0]}\t{segments[i][1]}\t{input_url}")
df.loc[len(df.index)] = [line, segments[i][0], segments[i][1], input_url]
df.to_csv(
"clips.tsv",
sep="\t",
encoding="utf-8",
index=False,
header=False,
quoting=csv.QUOTE_NONE,
)
return "\n".join(res), "clips.tsv", df
def load_video(input_url):
if input_url:
return input_url
return None
css = """
.required {background-color: #FFCCCB !important, font-size: 24px !important}
"""
with gr.Blocks(title="Start and stop times", css=css) as app:
gr.Markdown(
"""# Start and stop times generator
Please, fill the Video URL and Clip texts textboxes and click the Run button"""
)
with gr.Row():
with gr.Column(scale=3):
text1 = gr.Textbox(
lines=1,
placeholder="Video URL...",
label="Video URL",
elem_classes=["required"],
)
text2 = gr.Textbox(
lines=5,
max_lines=10,
placeholder="List of clip texts...",
label="Clip texts",
elem_classes=["required"],
)
slider1 = gr.Slider(
minimum=0,
maximum=1000,
step=50,
value=0,
label="Milliseconds BEFORE each clip",
)
slider2 = gr.Slider(
minimum=0,
maximum=1000,
step=50,
value=500,
label="Milliseconds AFTER each clip",
)
btn_submit = gr.Button(value="Run", variant="primary", size="sm")
video = gr.Video(
format="mp4", label="Video file", show_label=True, interactive=False
)
with gr.Column(scale=5):
file = gr.File(
label="Clips", show_label=True, file_count=1, interactive=False
)
lines = gr.Textbox(
lines=10, label="Clips", interactive=False, show_copy_button=True
)
data = gr.Dataframe(
label="Clips",
headers=["text", "start", "stop", "file"],
datatype=["str", "str", "str", "str"],
row_count=0,
)
btn_submit.click(
calculate_times,
inputs=[text1, text2, slider1, slider2],
outputs=[lines, file, data],
)
text1.blur(load_video, inputs=[text1], outputs=[video])
app.launch()