Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
-
from huggingface_hub import hf_hub_download
|
4 |
-
from torch.nn import Linear, Sequential, Tanh
|
5 |
import soundfile as sf
|
6 |
import edge_tts
|
|
|
7 |
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
8 |
from keybert import KeyBERT
|
9 |
from moviepy.editor import (
|
@@ -31,72 +30,52 @@ import threading
|
|
31 |
import time
|
32 |
from datetime import datetime, timedelta
|
33 |
|
34 |
-
# ------------------- CÓDIGO DEL MOTOR TOUCANTTS (Integrado) -------------------
|
35 |
-
# Este bloque contiene las funciones y clases extraídas para que el TTS funcione sin archivos externos.
|
36 |
-
|
37 |
-
# --- Contenido de Utility/utils.py ---
|
38 |
-
def float2pcm(sig, dtype='int16'):
|
39 |
-
sig = np.asarray(sig)
|
40 |
-
if sig.dtype.kind != 'f':
|
41 |
-
raise TypeError("'sig' must be a float array")
|
42 |
-
dtype = np.dtype(dtype)
|
43 |
-
if dtype.kind not in 'iu':
|
44 |
-
raise TypeError("'dtype' must be an integer type")
|
45 |
-
i = np.iinfo(dtype)
|
46 |
-
abs_max = 2 ** (i.bits - 1)
|
47 |
-
offset = i.min + abs_max
|
48 |
-
return (sig * abs_max + offset).clip(i.min, i.max).astype(dtype)
|
49 |
-
|
50 |
-
def load_json_from_path(path):
|
51 |
-
with open(path, "r") as f:
|
52 |
-
return json.load(f)
|
53 |
-
|
54 |
-
# --- Contenido de InferenceInterfaces/ToucanTTS.py (simplificado) y ControllableInterface.py ---
|
55 |
-
# Se han omitido y simplificado partes para reducir la complejidad, manteniendo la funcionalidad esencial.
|
56 |
-
# La carga completa del modelo ToucanTTS se hace a través de hf_hub_download, por lo que no es necesario el código completo aquí.
|
57 |
-
# La clase ControllableInterface es una adaptación de la original.
|
58 |
-
|
59 |
-
class EdgeTTSInterface:
|
60 |
-
def __init__(self, voice="es-ES-AlvaroNeural"): # puedes cambiar a "es-ES-ElviraNeural"
|
61 |
-
self.voice = voice
|
62 |
-
|
63 |
-
def read(self, text, language="es", accent=None):
|
64 |
-
tmp_path = tempfile.mktemp(suffix=".wav")
|
65 |
-
|
66 |
-
async def _synth():
|
67 |
-
communicate = edge_tts.Communicate(text, self.voice)
|
68 |
-
await communicate.save(tmp_path)
|
69 |
-
|
70 |
-
asyncio.run(_synth())
|
71 |
-
|
72 |
-
# cargar el wav en numpy
|
73 |
-
wav, sr = sf.read(tmp_path, dtype="float32")
|
74 |
-
return sr, wav
|
75 |
-
|
76 |
-
def get_tts_interface():
|
77 |
-
global tts_interface
|
78 |
-
if tts_interface is None:
|
79 |
-
tts_interface = EdgeTTSInterface()
|
80 |
-
return tts_interface
|
81 |
-
|
82 |
# ------------------- Configuración & Globals -------------------
|
83 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
84 |
logger = logging.getLogger(__name__)
|
85 |
|
86 |
PEXELS_API_KEY = os.getenv("PEXELS_API_KEY")
|
87 |
if not PEXELS_API_KEY:
|
88 |
-
|
89 |
|
90 |
-
tokenizer, gpt2_model, kw_model
|
91 |
RESULTS_DIR = "video_results"
|
92 |
os.makedirs(RESULTS_DIR, exist_ok=True)
|
93 |
TASKS = {}
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
# ------------------- Carga Perezosa de Modelos -------------------
|
96 |
def get_tokenizer():
|
97 |
global tokenizer
|
98 |
if tokenizer is None:
|
99 |
-
logger.info("Cargando tokenizer
|
100 |
tokenizer = GPT2Tokenizer.from_pretrained("datificate/gpt2-small-spanish")
|
101 |
if tokenizer.pad_token is None:
|
102 |
tokenizer.pad_token = tokenizer.eos_token
|
@@ -105,321 +84,497 @@ def get_tokenizer():
|
|
105 |
def get_gpt2_model():
|
106 |
global gpt2_model
|
107 |
if gpt2_model is None:
|
108 |
-
logger.info("Cargando modelo GPT-2
|
109 |
gpt2_model = GPT2LMHeadModel.from_pretrained("datificate/gpt2-small-spanish").eval()
|
110 |
return gpt2_model
|
111 |
|
112 |
def get_kw_model():
|
113 |
global kw_model
|
114 |
if kw_model is None:
|
115 |
-
logger.info("Cargando modelo KeyBERT
|
116 |
kw_model = KeyBERT("paraphrase-multilingual-MiniLM-L12-v2")
|
117 |
return kw_model
|
118 |
|
119 |
-
|
120 |
-
# Esta función ahora es un punto de entrada para el motor ToucanTTS
|
121 |
-
# La carga real se hará dentro de la función de síntesis para manejar el primer uso
|
122 |
-
# De momento, la dejamos como placeholder por si se necesita inicializar algo globalmente
|
123 |
-
pass
|
124 |
-
|
125 |
-
# ------------------- Funciones del Pipeline de Vídeo -------------------
|
126 |
def update_task_progress(task_id, message):
|
127 |
if task_id in TASKS:
|
128 |
TASKS[task_id]['progress_log'] = message
|
129 |
logger.info(f"[{task_id}] {message}")
|
130 |
|
131 |
def gpt2_script(prompt: str) -> str:
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
-
def
|
151 |
-
"""
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
-
def
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
-
def
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
-
def
|
|
|
175 |
try:
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
|
|
182 |
f.write(chunk)
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
except Exception as e:
|
185 |
-
logger.error(f"
|
186 |
return None
|
187 |
|
188 |
-
def
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
)
|
217 |
-
.
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
|
225 |
-
def
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
|
232 |
-
def
|
233 |
-
|
|
|
|
|
234 |
try:
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
video_duration = voice_clip.duration
|
|
|
242 |
if video_duration < 1:
|
243 |
-
raise ValueError("El audio generado es demasiado corto
|
244 |
-
|
|
|
|
|
245 |
video_paths = []
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
key=lambda f: f.get("width", 0)
|
255 |
-
)
|
256 |
-
if best_file:
|
257 |
-
path = download_file(best_file.get('link'), tmp_dir)
|
258 |
-
if path:
|
259 |
-
video_paths.append(path)
|
260 |
-
if len(video_paths) >= 8:
|
261 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
if not video_paths:
|
263 |
-
raise RuntimeError("No se
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
if base_video.duration < video_duration:
|
271 |
-
|
|
|
|
|
|
|
272 |
base_video = base_video.subclip(0, video_duration)
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
277 |
else:
|
278 |
final_audio = voice_clip
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
|
|
|
|
|
|
|
|
285 |
final_video.write_videofile(
|
286 |
output_path,
|
287 |
fps=24,
|
288 |
codec="libx264",
|
289 |
audio_codec="aac",
|
290 |
threads=2,
|
291 |
-
logger=None
|
|
|
292 |
)
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
voice_clip.close()
|
297 |
if 'music_clip' in locals():
|
298 |
music_clip.close()
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
update_task_progress(task_id, "Cargando motor de voz ToucanTTS (primera vez, puede tardar)...")
|
312 |
try:
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
# get_tts_interface()
|
320 |
-
except Exception as e:
|
321 |
-
TASKS[task_id].update({"status": "error", "error": f"Fallo al cargar el motor TTS: {e}"})
|
322 |
-
return
|
323 |
try:
|
324 |
-
|
325 |
-
|
326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
except Exception as e:
|
328 |
-
logger.error(f"Error en
|
329 |
-
TASKS[task_id].update({
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
if info.get("result") and os.path.exists(info.get("result")):
|
339 |
-
try:
|
340 |
-
os.remove(info["result"])
|
341 |
-
logger.info(f"[JANITOR] Eliminado: {info['result']}")
|
342 |
-
except Exception as e:
|
343 |
-
logger.error(f"[JANITOR] Error al eliminar {info['result']}: {e}")
|
344 |
-
del TASKS[task_id]
|
345 |
-
|
346 |
-
threading.Thread(target=janitor_thread, daemon=True).start()
|
347 |
-
|
348 |
-
def generate_and_monitor(mode, topic, user_script, music):
|
349 |
content = topic if mode == "Generar Guion con IA" else user_script
|
350 |
-
if not content.strip():
|
351 |
-
yield "Por favor, ingresa un tema o guion.", None, None
|
352 |
return
|
|
|
|
|
353 |
task_id = uuid.uuid4().hex[:8]
|
354 |
TASKS[task_id] = {
|
355 |
"status": "processing",
|
356 |
-
"progress_log": "Iniciando
|
357 |
"timestamp": datetime.utcnow()
|
358 |
}
|
359 |
-
|
360 |
-
|
|
|
|
|
361 |
args=(task_id, mode, topic, user_script, music),
|
362 |
daemon=True
|
363 |
)
|
364 |
-
|
|
|
|
|
365 |
while TASKS[task_id]["status"] == "processing":
|
366 |
yield TASKS[task_id]['progress_log'], None, None
|
367 |
time.sleep(1)
|
|
|
|
|
368 |
if TASKS[task_id]["status"] == "error":
|
369 |
-
yield
|
370 |
elif TASKS[task_id]["status"] == "done":
|
371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
|
373 |
-
#
|
374 |
-
with gr.Blocks(title="Generador de
|
375 |
-
gr.Markdown("
|
376 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
377 |
with gr.Row():
|
378 |
with gr.Column(scale=2):
|
|
|
|
|
379 |
mode_radio = gr.Radio(
|
380 |
-
["Generar Guion con IA", "Usar Mi Guion"],
|
381 |
value="Generar Guion con IA",
|
382 |
-
label="
|
383 |
)
|
384 |
-
|
385 |
-
|
386 |
-
|
|
|
|
|
387 |
)
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
|
|
393 |
)
|
394 |
-
|
395 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
396 |
with gr.Column(scale=2):
|
397 |
-
gr.Markdown("
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
|
|
|
|
402 |
)
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
gr.
|
410 |
-
|
411 |
-
|
|
|
|
|
412 |
mode_radio.change(
|
413 |
-
|
414 |
-
inputs=mode_radio,
|
415 |
-
outputs=[
|
416 |
)
|
417 |
-
|
418 |
-
|
419 |
-
fn=
|
420 |
-
inputs=[mode_radio,
|
421 |
-
outputs=[
|
422 |
)
|
423 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
if __name__ == "__main__":
|
425 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
|
|
|
|
3 |
import soundfile as sf
|
4 |
import edge_tts
|
5 |
+
import asyncio
|
6 |
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
7 |
from keybert import KeyBERT
|
8 |
from moviepy.editor import (
|
|
|
30 |
import time
|
31 |
from datetime import datetime, timedelta
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
# ------------------- Configuración & Globals -------------------
|
34 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
35 |
logger = logging.getLogger(__name__)
|
36 |
|
37 |
PEXELS_API_KEY = os.getenv("PEXELS_API_KEY")
|
38 |
if not PEXELS_API_KEY:
|
39 |
+
logger.warning("PEXELS_API_KEY no definido. Los videos no funcionarán.")
|
40 |
|
41 |
+
tokenizer, gpt2_model, kw_model = None, None, None
|
42 |
RESULTS_DIR = "video_results"
|
43 |
os.makedirs(RESULTS_DIR, exist_ok=True)
|
44 |
TASKS = {}
|
45 |
|
46 |
+
# ------------------- Motor Edge TTS -------------------
|
47 |
+
class EdgeTTSEngine:
|
48 |
+
def __init__(self, voice="es-ES-AlvaroNeural"):
|
49 |
+
self.voice = voice
|
50 |
+
logger.info(f"Inicializando Edge TTS con voz: {voice}")
|
51 |
+
|
52 |
+
async def _synthesize_async(self, text, output_path):
|
53 |
+
"""Sintetiza texto a voz usando Edge TTS de forma asíncrona"""
|
54 |
+
try:
|
55 |
+
communicate = edge_tts.Communicate(text, self.voice)
|
56 |
+
await communicate.save(output_path)
|
57 |
+
return True
|
58 |
+
except Exception as e:
|
59 |
+
logger.error(f"Error en Edge TTS: {e}")
|
60 |
+
return False
|
61 |
+
|
62 |
+
def synthesize(self, text, output_path):
|
63 |
+
"""Sintetiza texto a voz (wrapper síncrono)"""
|
64 |
+
try:
|
65 |
+
# Ejecutar la función async en un nuevo loop
|
66 |
+
return asyncio.run(self._synthesize_async(text, output_path))
|
67 |
+
except Exception as e:
|
68 |
+
logger.error(f"Error al sintetizar con Edge TTS: {e}")
|
69 |
+
return False
|
70 |
+
|
71 |
+
# Instancia global del motor TTS
|
72 |
+
tts_engine = EdgeTTSEngine()
|
73 |
+
|
74 |
# ------------------- Carga Perezosa de Modelos -------------------
|
75 |
def get_tokenizer():
|
76 |
global tokenizer
|
77 |
if tokenizer is None:
|
78 |
+
logger.info("Cargando tokenizer GPT2 español...")
|
79 |
tokenizer = GPT2Tokenizer.from_pretrained("datificate/gpt2-small-spanish")
|
80 |
if tokenizer.pad_token is None:
|
81 |
tokenizer.pad_token = tokenizer.eos_token
|
|
|
84 |
def get_gpt2_model():
|
85 |
global gpt2_model
|
86 |
if gpt2_model is None:
|
87 |
+
logger.info("Cargando modelo GPT-2 español...")
|
88 |
gpt2_model = GPT2LMHeadModel.from_pretrained("datificate/gpt2-small-spanish").eval()
|
89 |
return gpt2_model
|
90 |
|
91 |
def get_kw_model():
|
92 |
global kw_model
|
93 |
if kw_model is None:
|
94 |
+
logger.info("Cargando modelo KeyBERT multilingüe...")
|
95 |
kw_model = KeyBERT("paraphrase-multilingual-MiniLM-L12-v2")
|
96 |
return kw_model
|
97 |
|
98 |
+
# ------------------- Funciones del Pipeline -------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
def update_task_progress(task_id, message):
|
100 |
if task_id in TASKS:
|
101 |
TASKS[task_id]['progress_log'] = message
|
102 |
logger.info(f"[{task_id}] {message}")
|
103 |
|
104 |
def gpt2_script(prompt: str) -> str:
|
105 |
+
"""Genera un guión usando GPT-2"""
|
106 |
+
try:
|
107 |
+
local_tokenizer = get_tokenizer()
|
108 |
+
local_gpt2_model = get_gpt2_model()
|
109 |
+
|
110 |
+
instruction = f"Escribe un guion corto y coherente sobre: {prompt}"
|
111 |
+
inputs = local_tokenizer(instruction, return_tensors="pt", truncation=True, max_length=512)
|
112 |
+
|
113 |
+
outputs = local_gpt2_model.generate(
|
114 |
+
**inputs,
|
115 |
+
max_length=160 + inputs["input_ids"].shape[1],
|
116 |
+
do_sample=True,
|
117 |
+
top_p=0.9,
|
118 |
+
top_k=40,
|
119 |
+
temperature=0.7,
|
120 |
+
no_repeat_ngram_size=3,
|
121 |
+
pad_token_id=local_tokenizer.pad_token_id,
|
122 |
+
eos_token_id=local_tokenizer.eos_token_id,
|
123 |
+
)
|
124 |
+
|
125 |
+
text = local_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
126 |
+
generated = text.split("sobre:")[-1].strip()
|
127 |
+
return generated if generated else prompt
|
128 |
+
|
129 |
+
except Exception as e:
|
130 |
+
logger.error(f"Error generando guión: {e}")
|
131 |
+
return f"Hoy hablaremos sobre {prompt}. Este es un tema fascinante que merece nuestra atención."
|
132 |
|
133 |
+
def generate_tts_audio(text: str, output_path: str) -> bool:
|
134 |
+
"""Genera audio usando Edge TTS"""
|
135 |
+
try:
|
136 |
+
logger.info("Generando audio con Edge TTS...")
|
137 |
+
success = tts_engine.synthesize(text, output_path)
|
138 |
+
if success and os.path.exists(output_path) and os.path.getsize(output_path) > 0:
|
139 |
+
logger.info(f"Audio generado exitosamente: {output_path}")
|
140 |
+
return True
|
141 |
+
else:
|
142 |
+
logger.error("El archivo de audio no se generó correctamente")
|
143 |
+
return False
|
144 |
+
except Exception as e:
|
145 |
+
logger.error(f"Error generando TTS: {e}")
|
146 |
+
return False
|
147 |
|
148 |
+
def extract_keywords(text: str) -> list[str]:
|
149 |
+
"""Extrae palabras clave del texto para búsqueda de videos"""
|
150 |
+
try:
|
151 |
+
local_kw_model = get_kw_model()
|
152 |
+
clean_text = re.sub(r"[^\w\sáéíóúñÁÉÍÓÚÑ]", "", text.lower())
|
153 |
+
kws = local_kw_model.extract_keywords(clean_text, stop_words="spanish", top_n=5)
|
154 |
+
keywords = [k.replace(" ", "+") for k, _ in kws if k]
|
155 |
+
return keywords if keywords else ["naturaleza", "paisaje"]
|
156 |
+
except Exception as e:
|
157 |
+
logger.error(f"Error extrayendo keywords: {e}")
|
158 |
+
return ["naturaleza", "paisaje", "ciudad"]
|
159 |
|
160 |
+
def search_pexels_videos(query: str, count: int = 3) -> list[dict]:
|
161 |
+
"""Busca videos en Pexels"""
|
162 |
+
if not PEXELS_API_KEY:
|
163 |
+
return []
|
164 |
+
|
165 |
+
try:
|
166 |
+
response = requests.get(
|
167 |
+
"https://api.pexels.com/videos/search",
|
168 |
+
headers={"Authorization": PEXELS_API_KEY},
|
169 |
+
params={"query": query, "per_page": count, "orientation": "landscape"},
|
170 |
+
timeout=20
|
171 |
+
)
|
172 |
+
response.raise_for_status()
|
173 |
+
return response.json().get("videos", [])
|
174 |
+
except Exception as e:
|
175 |
+
logger.error(f"Error buscando videos en Pexels: {e}")
|
176 |
+
return []
|
177 |
|
178 |
+
def download_video(url: str, folder: str) -> str | None:
|
179 |
+
"""Descarga un video desde URL"""
|
180 |
try:
|
181 |
+
filename = f"{uuid.uuid4().hex}.mp4"
|
182 |
+
filepath = os.path.join(folder, filename)
|
183 |
+
|
184 |
+
with requests.get(url, stream=True, timeout=60) as response:
|
185 |
+
response.raise_for_status()
|
186 |
+
with open(filepath, "wb") as f:
|
187 |
+
for chunk in response.iter_content(chunk_size=1024*1024):
|
188 |
f.write(chunk)
|
189 |
+
|
190 |
+
if os.path.exists(filepath) and os.path.getsize(filepath) > 1000:
|
191 |
+
return filepath
|
192 |
+
else:
|
193 |
+
logger.error(f"Archivo descargado inválido: {filepath}")
|
194 |
+
return None
|
195 |
+
|
196 |
except Exception as e:
|
197 |
+
logger.error(f"Error descargando video {url}: {e}")
|
198 |
return None
|
199 |
|
200 |
+
def create_subtitle_clips(script: str, video_width: int, video_height: int, duration: float):
|
201 |
+
"""Crea clips de subtítulos"""
|
202 |
+
try:
|
203 |
+
sentences = [s.strip() for s in re.split(r"[.!?¿¡]", script) if s.strip()]
|
204 |
+
if not sentences:
|
205 |
+
return []
|
206 |
+
|
207 |
+
total_words = sum(len(s.split()) for s in sentences) or 1
|
208 |
+
time_per_word = duration / total_words
|
209 |
+
|
210 |
+
clips = []
|
211 |
+
current_time = 0.0
|
212 |
+
|
213 |
+
for sentence in sentences:
|
214 |
+
num_words = len(sentence.split())
|
215 |
+
sentence_duration = num_words * time_per_word
|
216 |
+
|
217 |
+
if sentence_duration < 0.5:
|
218 |
+
continue
|
219 |
+
|
220 |
+
txt_clip = (
|
221 |
+
TextClip(
|
222 |
+
sentence,
|
223 |
+
fontsize=max(20, int(video_height * 0.05)),
|
224 |
+
color="white",
|
225 |
+
stroke_color="black",
|
226 |
+
stroke_width=2,
|
227 |
+
method="caption",
|
228 |
+
size=(int(video_width * 0.9), None),
|
229 |
+
font="Arial-Bold"
|
230 |
+
)
|
231 |
+
.set_start(current_time)
|
232 |
+
.set_duration(sentence_duration)
|
233 |
+
.set_position(("center", "bottom"))
|
234 |
)
|
235 |
+
clips.append(txt_clip)
|
236 |
+
current_time += sentence_duration
|
237 |
+
|
238 |
+
return clips
|
239 |
+
except Exception as e:
|
240 |
+
logger.error(f"Error creando subtítulos: {e}")
|
241 |
+
return []
|
242 |
|
243 |
+
def loop_audio_to_duration(audio_clip: AudioFileClip, target_duration: float) -> AudioFileClip:
|
244 |
+
"""Hace loop del audio hasta alcanzar la duración objetivo"""
|
245 |
+
try:
|
246 |
+
if audio_clip.duration >= target_duration:
|
247 |
+
return audio_clip.subclip(0, target_duration)
|
248 |
+
|
249 |
+
loops_needed = math.ceil(target_duration / audio_clip.duration)
|
250 |
+
looped_audio = concatenate_audioclips([audio_clip] * loops_needed)
|
251 |
+
return looped_audio.subclip(0, target_duration)
|
252 |
+
except Exception as e:
|
253 |
+
logger.error(f"Error haciendo loop del audio: {e}")
|
254 |
+
return audio_clip
|
255 |
|
256 |
+
def create_video(script_text: str, generate_script: bool, music_path: str | None, task_id: str) -> str:
|
257 |
+
"""Función principal para crear el video"""
|
258 |
+
temp_dir = tempfile.mkdtemp()
|
259 |
+
|
260 |
try:
|
261 |
+
# Paso 1: Generar o usar guión
|
262 |
+
update_task_progress(task_id, "Paso 1/7: Preparando guión...")
|
263 |
+
if generate_script:
|
264 |
+
script = gpt2_script(script_text)
|
265 |
+
else:
|
266 |
+
script = script_text.strip()
|
267 |
+
|
268 |
+
if not script:
|
269 |
+
raise ValueError("El guión está vacío")
|
270 |
+
|
271 |
+
# Paso 2: Generar audio TTS
|
272 |
+
update_task_progress(task_id, "Paso 2/7: Generando audio con Edge TTS...")
|
273 |
+
audio_path = os.path.join(temp_dir, "voice.wav")
|
274 |
+
|
275 |
+
if not generate_tts_audio(script, audio_path):
|
276 |
+
raise RuntimeError("Error generando el audio TTS")
|
277 |
+
|
278 |
+
voice_clip = AudioFileClip(audio_path)
|
279 |
video_duration = voice_clip.duration
|
280 |
+
|
281 |
if video_duration < 1:
|
282 |
+
raise ValueError("El audio generado es demasiado corto")
|
283 |
+
|
284 |
+
# Paso 3: Buscar y descargar videos
|
285 |
+
update_task_progress(task_id, "Paso 3/7: Buscando videos en Pexels...")
|
286 |
video_paths = []
|
287 |
+
keywords = extract_keywords(script)
|
288 |
+
|
289 |
+
for i, keyword in enumerate(keywords[:3]): # Límite de 3 keywords
|
290 |
+
update_task_progress(task_id, f"Paso 3/7: Buscando videos para '{keyword}' ({i+1}/{len(keywords[:3])})")
|
291 |
+
|
292 |
+
videos = search_pexels_videos(keyword, 2)
|
293 |
+
for video_data in videos:
|
294 |
+
if len(video_paths) >= 6: # Límite de 6 videos
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
break
|
296 |
+
|
297 |
+
video_files = video_data.get("video_files", [])
|
298 |
+
if video_files:
|
299 |
+
# Tomar el video de mejor calidad
|
300 |
+
best_file = max(video_files, key=lambda f: f.get("width", 0))
|
301 |
+
video_url = best_file.get("link")
|
302 |
+
|
303 |
+
if video_url:
|
304 |
+
downloaded_path = download_video(video_url, temp_dir)
|
305 |
+
if downloaded_path:
|
306 |
+
video_paths.append(downloaded_path)
|
307 |
+
|
308 |
if not video_paths:
|
309 |
+
raise RuntimeError("No se pudieron descargar videos de Pexels")
|
310 |
+
|
311 |
+
# Paso 4: Procesar videos
|
312 |
+
update_task_progress(task_id, f"Paso 4/7: Procesando {len(video_paths)} videos...")
|
313 |
+
video_clips = []
|
314 |
+
|
315 |
+
for path in video_paths:
|
316 |
+
try:
|
317 |
+
clip = VideoFileClip(path)
|
318 |
+
# Tomar máximo 8 segundos de cada clip
|
319 |
+
duration = min(8, clip.duration)
|
320 |
+
video_clips.append(clip.subclip(0, duration))
|
321 |
+
except Exception as e:
|
322 |
+
logger.error(f"Error procesando video {path}: {e}")
|
323 |
+
continue
|
324 |
+
|
325 |
+
if not video_clips:
|
326 |
+
raise RuntimeError("No se pudieron procesar los videos")
|
327 |
+
|
328 |
+
# Concatenar videos
|
329 |
+
base_video = concatenate_videoclips(video_clips, method="chain")
|
330 |
+
|
331 |
+
# Extender video si es más corto que el audio
|
332 |
if base_video.duration < video_duration:
|
333 |
+
loops_needed = math.ceil(video_duration / base_video.duration)
|
334 |
+
base_video = concatenate_videoclips([base_video] * loops_needed)
|
335 |
+
|
336 |
+
# Cortar al tiempo exacto del audio
|
337 |
base_video = base_video.subclip(0, video_duration)
|
338 |
+
|
339 |
+
# Paso 5: Componer audio final
|
340 |
+
update_task_progress(task_id, "Paso 5/7: Componiendo audio...")
|
341 |
+
if music_path and os.path.exists(music_path):
|
342 |
+
try:
|
343 |
+
music_clip = AudioFileClip(music_path)
|
344 |
+
music_clip = loop_audio_to_duration(music_clip, video_duration).volumex(0.2)
|
345 |
+
final_audio = CompositeAudioClip([music_clip, voice_clip])
|
346 |
+
except Exception as e:
|
347 |
+
logger.error(f"Error con música: {e}")
|
348 |
+
final_audio = voice_clip
|
349 |
else:
|
350 |
final_audio = voice_clip
|
351 |
+
|
352 |
+
# Paso 6: Crear subtítulos
|
353 |
+
update_task_progress(task_id, "Paso 6/7: Agregando subtítulos...")
|
354 |
+
subtitle_clips = create_subtitle_clips(script, base_video.w, base_video.h, video_duration)
|
355 |
+
|
356 |
+
# Paso 7: Renderizar video final
|
357 |
+
update_task_progress(task_id, "Paso 7/7: Renderizando video final...")
|
358 |
+
final_video = CompositeVideoClip([base_video] + subtitle_clips).set_audio(final_audio)
|
359 |
+
|
360 |
+
output_path = os.path.join(RESULTS_DIR, f"video_{task_id}.mp4")
|
361 |
final_video.write_videofile(
|
362 |
output_path,
|
363 |
fps=24,
|
364 |
codec="libx264",
|
365 |
audio_codec="aac",
|
366 |
threads=2,
|
367 |
+
logger=None,
|
368 |
+
verbose=False
|
369 |
)
|
370 |
+
|
371 |
+
# Limpiar clips
|
372 |
+
voice_clip.close()
|
|
|
373 |
if 'music_clip' in locals():
|
374 |
music_clip.close()
|
375 |
+
base_video.close()
|
376 |
+
final_video.close()
|
377 |
+
for clip in video_clips:
|
378 |
+
clip.close()
|
379 |
+
|
380 |
+
return output_path
|
381 |
+
|
382 |
+
except Exception as e:
|
383 |
+
logger.error(f"Error creando video: {e}")
|
384 |
+
raise
|
385 |
+
finally:
|
386 |
+
# Limpiar directorio temporal
|
|
|
387 |
try:
|
388 |
+
shutil.rmtree(temp_dir)
|
389 |
+
except:
|
390 |
+
pass
|
391 |
+
|
392 |
+
def worker_thread(task_id: str, mode: str, topic: str, user_script: str, music_path: str | None):
|
393 |
+
"""Hilo worker para procesamiento de video"""
|
|
|
|
|
|
|
|
|
394 |
try:
|
395 |
+
generate_script = (mode == "Generar Guion con IA")
|
396 |
+
content = topic if generate_script else user_script
|
397 |
+
|
398 |
+
output_path = create_video(content, generate_script, music_path, task_id)
|
399 |
+
|
400 |
+
TASKS[task_id].update({
|
401 |
+
"status": "done",
|
402 |
+
"result": output_path,
|
403 |
+
"progress_log": "✅ ¡Video completado exitosamente!"
|
404 |
+
})
|
405 |
+
|
406 |
except Exception as e:
|
407 |
+
logger.error(f"Error en worker {task_id}: {e}")
|
408 |
+
TASKS[task_id].update({
|
409 |
+
"status": "error",
|
410 |
+
"error": str(e),
|
411 |
+
"progress_log": f"❌ Error: {str(e)}"
|
412 |
+
})
|
413 |
+
|
414 |
+
def generate_video_with_progress(mode, topic, user_script, music):
|
415 |
+
"""Función principal que maneja la generación con progreso en tiempo real"""
|
416 |
+
# Validar entrada
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
417 |
content = topic if mode == "Generar Guion con IA" else user_script
|
418 |
+
if not content or not content.strip():
|
419 |
+
yield "❌ Error: Por favor, ingresa un tema o guion.", None, None
|
420 |
return
|
421 |
+
|
422 |
+
# Crear tarea
|
423 |
task_id = uuid.uuid4().hex[:8]
|
424 |
TASKS[task_id] = {
|
425 |
"status": "processing",
|
426 |
+
"progress_log": "🚀 Iniciando generación de video...",
|
427 |
"timestamp": datetime.utcnow()
|
428 |
}
|
429 |
+
|
430 |
+
# Iniciar worker
|
431 |
+
worker = threading.Thread(
|
432 |
+
target=worker_thread,
|
433 |
args=(task_id, mode, topic, user_script, music),
|
434 |
daemon=True
|
435 |
)
|
436 |
+
worker.start()
|
437 |
+
|
438 |
+
# Monitorear progreso
|
439 |
while TASKS[task_id]["status"] == "processing":
|
440 |
yield TASKS[task_id]['progress_log'], None, None
|
441 |
time.sleep(1)
|
442 |
+
|
443 |
+
# Retornar resultado final
|
444 |
if TASKS[task_id]["status"] == "error":
|
445 |
+
yield TASKS[task_id]['progress_log'], None, None
|
446 |
elif TASKS[task_id]["status"] == "done":
|
447 |
+
result_path = TASKS[task_id]['result']
|
448 |
+
yield TASKS[task_id]['progress_log'], result_path, result_path
|
449 |
+
|
450 |
+
# ------------------- Limpieza automática -------------------
|
451 |
+
def cleanup_old_files():
|
452 |
+
"""Limpia archivos antiguos cada hora"""
|
453 |
+
while True:
|
454 |
+
try:
|
455 |
+
time.sleep(3600) # 1 hora
|
456 |
+
now = datetime.utcnow()
|
457 |
+
logger.info("Ejecutando limpieza de archivos antiguos...")
|
458 |
+
|
459 |
+
for task_id, info in list(TASKS.items()):
|
460 |
+
if "timestamp" in info and now - info["timestamp"] > timedelta(hours=24):
|
461 |
+
if info.get("result") and os.path.exists(info.get("result")):
|
462 |
+
try:
|
463 |
+
os.remove(info["result"])
|
464 |
+
logger.info(f"Archivo eliminado: {info['result']}")
|
465 |
+
except Exception as e:
|
466 |
+
logger.error(f"Error eliminando archivo: {e}")
|
467 |
+
del TASKS[task_id]
|
468 |
+
|
469 |
+
except Exception as e:
|
470 |
+
logger.error(f"Error en cleanup: {e}")
|
471 |
+
|
472 |
+
# Iniciar hilo de limpieza
|
473 |
+
threading.Thread(target=cleanup_old_files, daemon=True).start()
|
474 |
+
|
475 |
+
# ------------------- Interfaz Gradio -------------------
|
476 |
+
def toggle_input_fields(mode):
|
477 |
+
"""Alterna los campos de entrada según el modo seleccionado"""
|
478 |
+
return (
|
479 |
+
gr.update(visible=mode == "Generar Guion con IA"),
|
480 |
+
gr.update(visible=mode != "Generar Guion con IA")
|
481 |
+
)
|
482 |
|
483 |
+
# Crear interfaz
|
484 |
+
with gr.Blocks(title="🎬 Generador de Videos IA", theme=gr.themes.Soft()) as demo:
|
485 |
+
gr.Markdown("""
|
486 |
+
# 🎬 Generador de Videos con IA
|
487 |
+
|
488 |
+
Crea videos profesionales a partir de texto usando:
|
489 |
+
- **Edge TTS** para voz en español
|
490 |
+
- **GPT-2** para generación de guiones
|
491 |
+
- **Pexels API** para videos de stock
|
492 |
+
- **Subtítulos automáticos** y efectos visuales
|
493 |
+
|
494 |
+
El progreso se mostrará en tiempo real.
|
495 |
+
""")
|
496 |
+
|
497 |
with gr.Row():
|
498 |
with gr.Column(scale=2):
|
499 |
+
gr.Markdown("### ⚙️ Configuración")
|
500 |
+
|
501 |
mode_radio = gr.Radio(
|
502 |
+
choices=["Generar Guion con IA", "Usar Mi Guion"],
|
503 |
value="Generar Guion con IA",
|
504 |
+
label="Método de creación"
|
505 |
)
|
506 |
+
|
507 |
+
topic_input = gr.Textbox(
|
508 |
+
label="💡 Tema para la IA",
|
509 |
+
placeholder="Ej: Los misterios del océano profundo",
|
510 |
+
lines=2
|
511 |
)
|
512 |
+
|
513 |
+
script_input = gr.Textbox(
|
514 |
+
label="📝 Tu Guion Completo",
|
515 |
+
placeholder="Escribe aquí tu guion personalizado...",
|
516 |
+
lines=8,
|
517 |
+
visible=False
|
518 |
)
|
519 |
+
|
520 |
+
music_input = gr.Audio(
|
521 |
+
type="filepath",
|
522 |
+
label="🎵 Música de fondo (opcional)"
|
523 |
+
)
|
524 |
+
|
525 |
+
generate_btn = gr.Button(
|
526 |
+
"🎬 Generar Video",
|
527 |
+
variant="primary",
|
528 |
+
size="lg"
|
529 |
+
)
|
530 |
+
|
531 |
with gr.Column(scale=2):
|
532 |
+
gr.Markdown("### 📊 Progreso y Resultados")
|
533 |
+
|
534 |
+
progress_output = gr.Textbox(
|
535 |
+
label="📋 Log de progreso en tiempo real",
|
536 |
+
lines=12,
|
537 |
+
interactive=False,
|
538 |
+
show_copy_button=True
|
539 |
)
|
540 |
+
|
541 |
+
video_output = gr.Video(
|
542 |
+
label="🎥 Video generado",
|
543 |
+
height=400
|
544 |
+
)
|
545 |
+
|
546 |
+
download_output = gr.File(
|
547 |
+
label="📥 Descargar archivo"
|
548 |
+
)
|
549 |
+
|
550 |
+
# Event handlers
|
551 |
mode_radio.change(
|
552 |
+
fn=toggle_input_fields,
|
553 |
+
inputs=[mode_radio],
|
554 |
+
outputs=[topic_input, script_input]
|
555 |
)
|
556 |
+
|
557 |
+
generate_btn.click(
|
558 |
+
fn=generate_video_with_progress,
|
559 |
+
inputs=[mode_radio, topic_input, script_input, music_input],
|
560 |
+
outputs=[progress_output, video_output, download_output]
|
561 |
)
|
562 |
+
|
563 |
+
gr.Markdown("""
|
564 |
+
### 📋 Instrucciones:
|
565 |
+
1. **Elige el método**: Genera un guion con IA o usa el tuyo propio
|
566 |
+
2. **Configura el contenido**: Ingresa un tema interesante o tu guion
|
567 |
+
3. **Música opcional**: Sube un archivo de audio para fondo musical
|
568 |
+
4. **Genera**: Presiona el botón y observa el progreso en tiempo real
|
569 |
+
|
570 |
+
⏱️ **Tiempo estimado**: 2-5 minutos dependiendo de la duración del contenido.
|
571 |
+
""")
|
572 |
+
|
573 |
+
# Ejecutar aplicación
|
574 |
if __name__ == "__main__":
|
575 |
+
logger.info("🚀 Iniciando aplicación Generador de Videos IA...")
|
576 |
+
demo.launch(
|
577 |
+
server_name="0.0.0.0",
|
578 |
+
server_port=7860,
|
579 |
+
show_api=False
|
580 |
+
)
|