jonathang's picture
Update app.py
00451f0
import moviepy.editor as mp
import librosa
import numpy as np
import gradio as gr
import subprocess
def buffer_n_merge(intervals, buffer=0.1):
if not intervals: return []
new_intervals = [intervals[0]]
new_intervals[0][0] -= buffer
new_intervals[0][1] += buffer
for start, end in intervals[1:]:
start -= buffer
end += buffer
if new_intervals[-1][-1] >= start:
new_intervals[-1][-1] = end
else:
new_intervals.append([start, end])
return new_intervals
def download_and_process_video(in_f, threshold_db, buffer_sec):
vidpath = in_f.name
# load the video
video = mp.VideoFileClip(vidpath)
# extract audio and convert to mono
audio = video.audio.to_soundarray(fps=22000)
# use librosa to get non-silent intervals
non_silent_intervals = librosa.effects.split(audio[:, 0], top_db=threshold_db)
# convert non_silent_intervals from samples to seconds, as librosa works with samples not seconds
non_silent_intervals_sec = np.array(non_silent_intervals) / 22000
# Add buffer and merge intervals
non_silent_intervals_sec = buffer_n_merge(non_silent_intervals_sec.tolist(), buffer=buffer_sec)
# Process video
# cut the video using the non-silent intervals and store the clips in a list
clips = [video.subclip(max(0, start_time), min(end_time, video.duration)) for start_time, end_time in non_silent_intervals_sec]
output_file = 'my_concatenation.mp4'
final_clip = mp.concatenate_videoclips(clips)
final_clip.write_videofile(output_file, codec='libx264', audio_codec='aac', temp_audiofile='temp-audio.m4a', remove_temp=True)
return output_file
iface = gr.Interface(
fn=download_and_process_video,
inputs=[
gr.inputs.File(label="Video File (.mp4 only)", file_count='single', type='file'),
gr.inputs.Slider(minimum=1, maximum=70, step=1, default=30, label="Threshold (db)"),
gr.inputs.Slider(minimum=0, maximum=2, step=0.01, default=0.1, label="Buffer (sec)"),
],
outputs=gr.outputs.Video(label="Processed Video"),
title="Video Silence Remover"
)
iface.launch()