Creador-de-videos-con-imagen

Running

App Files Files Community

Creador-de-videos-con-imagen / app.py

gnosticdev

Update app.py

a241f1d verified 5 months ago

raw

history blame contribute delete

6.88 kB

	import gradio as gr
	import edge_tts
	import asyncio
	import tempfile
	import os
	import math
	from pydub import AudioSegment
	import subprocess

	# Función para obtener voces disponibles
	async def get_voices():
	voices = await edge_tts.list_voices()
	return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

	# Conversión de texto a voz
	async def text_to_speech(text, voice, rate, pitch):
	if not text.strip() or not voice:
	return (None, "Please enter text and select a voice") if not text else (None, "Please select a voice")

	try:
	communicate = edge_tts.Communicate(
	text,
	voice.split(" - ")[0],
	rate=f"{rate:+d}%",
	pitch=f"{pitch:+d}Hz"
	)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
	await communicate.save(tmp_file.name)
	return tmp_file.name, None
	except Exception as e:
	return None, f"Speech generation failed: {str(e)}"

	# Agregar música de fondo (ahora elimina el audio original)
	def add_background_music(speech_path, bg_music_path):
	speech = AudioSegment.from_file(speech_path)
	background = AudioSegment.from_file(bg_music_path) - 16 # 15% volume

	if len(background) < len(speech) + 3000:
	background = background * math.ceil((len(speech)+3000)/len(background))

	combined = speech.overlay(background[:len(speech)])
	fade_out = background[len(speech):len(speech)+3000].fade_out(3000)
	final_audio = combined + fade_out

	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
	final_audio.export(tmp_file.name, format="mp3")
	# Eliminar audio original
	if os.path.exists(speech_path):
	os.remove(speech_path)
	return tmp_file.name

	# Procesar múltiples videos (ahora elimina archivos temporales)
	def process_videos(audio_path, video_files):
	temp_files = []
	try:
	audio_duration = AudioSegment.from_file(audio_path).duration_seconds

	# Concatenar videos
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as concat_video:
	temp_files.append(concat_video.name)

	with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as list_file:
	temp_files.append(list_file.name)
	list_file.write("\n".join([f"file '{v.name}'" for v in video_files]))
	list_file.close()

	subprocess.run([
	"ffmpeg", "-y",
	"-f", "concat",
	"-safe", "0",
	"-i", list_file.name,
	"-c", "copy",
	concat_video.name
	], check=True)

	# Crear video final
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as final_video:
	subprocess.run([
	"ffmpeg", "-y",
	"-stream_loop", "-1",
	"-i", concat_video.name,
	"-i", audio_path,
	"-t", str(audio_duration + 3),
	"-c:v", "libx264",
	"-c:a", "aac",
	"-vf", "fade=t=out:st={}:d=3".format(audio_duration),
	"-af", "afade=t=out:st={}:d=3".format(audio_duration),
	"-shortest",
	final_video.name
	], check=True)

	return final_video.name

	finally:
	# Eliminar archivos temporales
	for f in temp_files:
	if os.path.exists(f):
	os.remove(f)

	# Función principal (ahora elimina videos originales)
	async def tts_interface(text, voice, rate, pitch, bg_music, video_files):
	temp_audio = None
	try:
	# Generar audio principal
	temp_audio, warning = await text_to_speech(text, voice, rate, pitch)
	if warning:
	return None, None, gr.Warning(warning)

	# Agregar música de fondo
	if bg_music:
	temp_audio = add_background_music(temp_audio, bg_music)

	# Procesar videos
	video_path = None
	if video_files:
	video_path = process_videos(temp_audio, video_files)
	# Eliminar videos originales subidos
	for video in video_files:
	if hasattr(video, 'name') and os.path.exists(video.name):
	os.remove(video.name)

	return temp_audio, video_path, None

	except Exception as e:
	return None, None, gr.Warning(f"Processing error: {str(e)}")
	finally:
	# Eliminar audio temporal si existe y no es la salida final
	if temp_audio and os.path.exists(temp_audio):
	try:
	if video_path and temp_audio != video_path:
	os.remove(temp_audio)
	except: # Evitar errores si el archivo ya fue eliminado
	pass

	# Crear interfaz (sin cambios)
	async def create_demo():
	voices = await get_voices()

	demo = gr.Interface(
	fn=tts_interface,
	inputs=[
	gr.Textbox(label="Input Text", lines=5, placeholder="Enter your text here..."),
	gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice"),
	gr.Slider(-50, 50, 0, label="Speech Rate (%)"),
	gr.Slider(-20, 20, 0, label="Pitch (Hz)"),
	gr.Audio(label="Background Music", type="filepath"),
	gr.File(label="Upload Videos", file_types=[".mp4", ".mov"], file_count="multiple")
	],
	outputs=[
	gr.Audio(label="Generated Audio", type="filepath"),
	gr.Video(label="Final Video"),
	gr.Markdown(visible=False)
	],
	title="Multi-Video TTS con Bucle",
	description="""
	Este script permite crear videos personalizados combinando texto, audio y múltiples clips de video.
	Convierte texto en voz usando tecnología avanzada de síntesis de voz (Text-to-Speech),
	opcionalmente añade música de fondo para enriquecer el audio generado y procesa varios videos subidos por el usuario
	para reproducirlos en secuencia y en bucle infinito.
	El resultado final es un video que sincroniza el audio con la concatenación de los clips,
	asegurando una transición suave entre ellos y un fade-out al final de cada ciclo. Además, el script está diseñado para
	limpiar automáticamente los archivos temporales y los videos originales subidos, evitando acumulación innecesaria en el servidor.
	Es ideal para generar contenido dinámico como videos motivacionales, presentaciones automáticas o material promocional.
	""",
	css="#component-0 {max-width: 800px}"
	)
	return demo

	async def main():
	demo = await create_demo()
	demo.queue()
	demo.launch()

	if __name__ == "__main__":
	asyncio.run(main())