import gradio as gr import numpy as np from moviepy.editor import AudioFileClip, ImageClip, CompositeVideoClip import matplotlib.pyplot as plt import tempfile import os def create_waveform_video(image, audio_path): # Check if audio file was uploaded if audio_path is None: return "Error: No audio file provided." try: # Save uploaded image temporarily image_path = tempfile.mktemp(suffix=".png") video_path = tempfile.mktemp(suffix=".mp4") waveform_path = tempfile.mktemp(suffix=".png") image.save(image_path) # Load audio audio_clip = AudioFileClip(audio_path) # Generate waveform as an image waveform = audio_clip.to_soundarray(fps=22050) # Convert audio to waveform array if waveform.ndim > 1: waveform = waveform.mean(axis=1) # Convert stereo to mono waveform = np.interp(waveform, (waveform.min(), waveform.max()), (-1, 1)) # Normalize # Plot waveform using matplotlib plt.figure(figsize=(10, 2)) plt.plot(waveform, color="blue") plt.axis("off") plt.savefig(waveform_path, bbox_inches="tight", pad_inches=0) plt.close() # Load image and create a video clip of the same duration as the audio img_clip = ImageClip(image_path).set_duration(audio_clip.duration).resize(height=720) waveform_clip = ImageClip(waveform_path).set_duration(audio_clip.duration).resize(width=img_clip.w) waveform_clip = waveform_clip.set_position(("center", img_clip.h - waveform_clip.h - 20)) # Combine image, waveform overlay, and audio into one video final_clip = CompositeVideoClip([img_clip, waveform_clip.set_opacity(0.7)]).set_audio(audio_clip) final_clip.write_videofile(video_path, codec="libx264", fps=24, audio_codec="aac") # Cleanup temporary files os.remove(image_path) os.remove(waveform_path) return video_path except Exception as e: return f"An error occurred: {str(e)}" iface = gr.Interface( fn=create_waveform_video, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Audio(type="filepath", label="Upload Audio") ], outputs=gr.Video(label="Generated Video"), title="Image + Audio to Video with Waveform Overlay" ) iface.launch(debug=True)