Spaces:

gnosticdev
/

audio-a-video

Running

App Files Files Community

audio-a-video / app.py

gnosticdev

Create app.py

9857383 verified about 1 month ago

raw

history blame

5.13 kB

	import gradio as gr
	import moviepy.editor as mp
	import numpy as np
	import librosa
	import librosa.display
	import matplotlib.pyplot as plt
	import io
	import os

	# Función principal para generar el video
	def audio_to_video(audio_file, image_file, effect_type="waveform"):
	"""
	Genera un video a partir de un archivo de audio y una imagen, con un efecto visual sincronizado.

	Args:
	audio_file: Ruta al archivo de audio (wav o mp3).
	image_file: Ruta al archivo de imagen (debe ser un formato soportado por MoviePy).
	effect_type: Tipo de efecto visual a utilizar ("waveform" por defecto, otros tipos se pueden agregar).

	Returns:
	Ruta al archivo de video generado (mp4). Si falla, retorna un mensaje de error.
	"""
	try:
	# 1. Cargar el audio usando Librosa
	y, sr = librosa.load(audio_file)
	duration = librosa.get_duration(y=y, sr=sr)

	# 2. Cargar la imagen
	img_clip = mp.ImageClip(image_file)
	img_clip = img_clip.set_duration(duration) # Asignar la duración del audio a la imagen

	# 3. Generar el efecto visual
	if effect_type == "waveform":
	audio_envelope = np.abs(y) # Calculate the audio envelope

	# Normalize audio envelope to image dimensions
	audio_envelope = audio_envelope / np.max(audio_envelope)
	audio_envelope = audio_envelope * img_clip.size[1] / 2 # Scale to half the image height


	def make_frame(t):
	# Create a new figure for each frame
	fig, ax = plt.subplots(figsize=(img_clip.size[0]/100, img_clip.size[1]/100), dpi=100) # Adjust figsize for image dimensions
	ax.set_xlim(0, duration)
	ax.set_ylim(-img_clip.size[1] / 2, img_clip.size[1] / 2)
	ax.axis('off') # Hide axis

	# Plot waveform
	time_index = int(t * sr)
	wave_slice = audio_envelope[max(0,time_index - sr//10):min(len(audio_envelope), time_index + sr//10)]
	time_slice = np.linspace(0,0.2,len(wave_slice))
	ax.plot(np.linspace(t-0.1,t+0.1,len(wave_slice)), wave_slice-img_clip.size[1]/4, color='red')
	ax.plot(np.linspace(t-0.1,t+0.1,len(wave_slice)), -wave_slice+img_clip.size[1]/4, color='red')


	# Convert the Matplotlib figure to an image
	buf = io.BytesIO()
	fig.canvas.print_png(buf)
	data = np.frombuffer(buf.getvalue(), dtype=np.uint8)
	img = plt.imread(io.BytesIO(data)) #read as image
	plt.close(fig) # Close the figure to prevent memory leaks
	return img


	audio_effect_clip = mp.VideoClip(make_frame, duration=duration)
	audio_effect_clip = audio_effect_clip.set_fps(24) # Set a reasonable frame rate

	else:
	return "Error: Efecto visual no soportado."

	# 4. Overlay effect onto image
	final_clip = mp.CompositeVideoClip([img_clip, audio_effect_clip.set_pos("center")])


	# 5. Agregar el audio al video
	audio_clip = mp.AudioFileClip(audio_file)
	final_clip = final_clip.set_audio(audio_clip)


	# 6. Guardar el video
	output_video_path = "output.mp4"
	final_clip.write_videofile(output_video_path, fps=24, codec="libx264", audio_codec="aac") # Ajustar los parámetros de codificación según sea necesario
	return output_video_path

	except Exception as e:
	return f"Error: {str(e)}"


	# ----------------------------------
	# Gradio Interface
	# ----------------------------------

	iface = gr.Interface(
	fn=audio_to_video,
	inputs=[
	gr.Audio(source="upload", type="filepath", label="Subir Archivo de Audio (WAV o MP3)"),
	gr.Image(source="upload", type="filepath", label="Subir Imagen"),
	gr.Radio(["waveform"], value="waveform", label="Tipo de Efecto Visual (Waveform es el único soportado por ahora)")
	],
	outputs="video",
	title="Audio to Video Generator",
	description="Sube un archivo de audio y una imagen para crear un video con un efecto visual sincronizado con la música. Por ahora solo funciona el efecto 'waveform'.",
	examples=[["audio_example.wav", "image_example.jpg", "waveform"]] # Add example files if you want.
	)


	# ----------------------------------
	# Example files (optional). Create these files
	# or remove the 'examples' line above.
	# ----------------------------------
	# Create dummy audio and image for example purposes if they don't exist
	if not os.path.exists("audio_example.wav"):
	sr = 22050
	T = 5
	t = np.linspace(0, T, int(T*sr), endpoint=False)
	x = 0.5np.sin(2np.pi440t) # A4 frequency
	librosa.output.write_wav("audio_example.wav", x, sr)

	if not os.path.exists("image_example.jpg"):
	# Create a simple placeholder image
	import matplotlib.pyplot as plt
	fig, ax = plt.subplots(figsize=(6,4))
	ax.text(0.5, 0.5, "Placeholder Image", ha="center", va="center")
	ax.axis("off")
	fig.savefig("image_example.jpg")
	plt.close(fig)



	iface.launch()