Spaces:

jonathang
/

YoutubeSmartSpeed

Runtime error

File size: 2,162 Bytes

import moviepy.editor as mp
import librosa
import numpy as np
import gradio as gr
import subprocess


def buffer_n_merge(intervals, buffer=0.1):
    if not intervals: return []

    new_intervals = [intervals[0]]
    new_intervals[0][0] -= buffer
    new_intervals[0][1] += buffer

    for start, end in intervals[1:]:
        start -= buffer
        end += buffer
        if new_intervals[-1][-1] >= start:
            new_intervals[-1][-1] = end
        else:
            new_intervals.append([start, end])
    return new_intervals


def download_and_process_video(in_f, threshold_db, buffer_sec):
    vidpath = in_f.name

    # load the video
    video = mp.VideoFileClip(vidpath)
    # extract audio and convert to mono
    audio = video.audio.to_soundarray(fps=22000)

    # use librosa to get non-silent intervals
    non_silent_intervals = librosa.effects.split(audio[:, 0], top_db=threshold_db) 
    # convert non_silent_intervals from samples to seconds, as librosa works with samples not seconds
    non_silent_intervals_sec = np.array(non_silent_intervals) / 22000

    # Add buffer and merge intervals
    non_silent_intervals_sec = buffer_n_merge(non_silent_intervals_sec.tolist(), buffer=buffer_sec)

    # Process video
    # cut the video using the non-silent intervals and store the clips in a list
    clips = [video.subclip(max(0, start_time), min(end_time, video.duration)) for start_time, end_time in non_silent_intervals_sec]

    output_file = 'my_concatenation.mp4'
    final_clip = mp.concatenate_videoclips(clips)
    final_clip.write_videofile(output_file, codec='libx264', audio_codec='aac', temp_audiofile='temp-audio.m4a', remove_temp=True)
    
    return output_file


iface = gr.Interface(
    fn=download_and_process_video,
    inputs=[
        gr.inputs.File(label="Video File (.mp4 only)", file_count='single', type='file'),
        gr.inputs.Slider(minimum=1, maximum=70, step=1, default=30, label="Threshold (db)"),
        gr.inputs.Slider(minimum=0, maximum=2, step=0.01, default=0.1, label="Buffer (sec)"),
    ],
    outputs=gr.outputs.Video(label="Processed Video"),
    title="Video Silence Remover"
)

iface.launch()