Spaces:
Running
Running
File size: 4,658 Bytes
787cd7d 7a6c0da fa786d6 7a6c0da fa786d6 7a6c0da fa786d6 7a6c0da fa786d6 7a6c0da fa786d6 8118032 fa786d6 7a6c0da fe5350d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import csv
import datetime
import requests
import gradio as gr
import pandas as pd
from io import BytesIO
from pathlib import Path
from urllib.parse import urlparse
from pydub import AudioSegment, silence
def format_seconds(secs):
t = datetime.datetime(
year=1, month=1, day=1, hour=0, minute=0
) + datetime.timedelta(seconds=secs)
return t.strftime("%M:%S.%f")[:-3]
def get_filename_and_extension(url):
parsed_url = urlparse(url)
path = parsed_url.path
filename = Path(path).name
filename_without_extension = Path(filename).stem
file_extension = Path(filename).suffix
return filename, filename_without_extension, file_extension
def calculate_times(input_url, input_text, ms_before, ms_after):
_, _, file_extension = get_filename_and_extension(input_url)
file_extension = file_extension.replace(".", "")
res = requests.get(input_url)
audio = AudioSegment.from_file(BytesIO(res.content), file_extension)
non_silent_parts = silence.detect_nonsilent(
audio, min_silence_len=1250, silence_thresh=-80
)
segments = [
(
format_seconds((start - ms_before) / 1000),
format_seconds((stop + ms_after) / 1000),
)
for start, stop in non_silent_parts
]
df = pd.DataFrame({"text": [], "start": [], "stop": [], "file": []})
lines = input_text.splitlines()
if len(lines) != len(segments):
msg = f"DETECTED CLIPS AND INPUT LINES DO NOT MATCH!\n\nYou are expecting {len(lines)} clips BUT {len(segments)} segments have been found in the video file.\n\nPlease, review the list of clips or transcribe the audio to check the clips.\n\nUSEFUL FREE TOOLS:\n\nTranscribe audio to VTT file\nhttps://replicate.com/openai/whisper\n\nVTT file viewer\nhttps://www.happyscribe.com/subtitle-tools/online-subtitle-editor/free"
df.loc[len(df.index)] = ["", "", "", ""]
return msg, None, df
else:
res = []
for i in range(len(segments)):
line = lines[i].rstrip()
res.append(f"{line}\t{segments[i][0]}\t{segments[i][1]}\t{input_url}")
df.loc[len(df.index)] = [line, segments[i][0], segments[i][1], input_url]
df.to_csv(
"clips.tsv",
sep="\t",
encoding="utf-8",
index=False,
header=False,
quoting=csv.QUOTE_NONE,
)
return "\n".join(res), "clips.tsv", df
def load_video(input_url):
if input_url:
return input_url
return None
css = """
.required {background-color: #FFCCCB !important, font-size: 24px !important}
"""
with gr.Blocks(title="Start and stop times", css=css) as app:
gr.Markdown(
"""# Start and stop times generator
Please, fill the Video URL and Clip texts textboxes and click the Run button"""
)
with gr.Row():
with gr.Column(scale=3):
text1 = gr.Textbox(
lines=1,
placeholder="Video URL...",
label="Video URL",
elem_classes=["required"],
)
text2 = gr.Textbox(
lines=5,
max_lines=10,
placeholder="List of clip texts...",
label="Clip texts",
elem_classes=["required"],
)
slider1 = gr.Slider(
minimum=0,
maximum=1000,
step=50,
value=0,
label="Milliseconds BEFORE each clip",
)
slider2 = gr.Slider(
minimum=0,
maximum=1000,
step=50,
value=500,
label="Milliseconds AFTER each clip",
)
btn_submit = gr.Button(value="Run", variant="primary", size="sm")
video = gr.Video(
format="mp4", label="Video file", show_label=True, interactive=False
)
with gr.Column(scale=5):
file = gr.File(
label="Clips", show_label=True, file_count=1, interactive=False
)
lines = gr.Textbox(
lines=10, label="Clips", interactive=False, show_copy_button=True
)
data = gr.Dataframe(
label="Clips",
headers=["text", "start", "stop", "file"],
datatype=["str", "str", "str", "str"],
row_count=0,
)
btn_submit.click(
calculate_times,
inputs=[text1, text2, slider1, slider2],
outputs=[lines, file, data],
)
text1.blur(load_video, inputs=[text1], outputs=[video])
app.launch()
|