Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import tempfile
|
|
| 5 |
import requests
|
| 6 |
from datetime import datetime
|
| 7 |
import edge_tts
|
|
|
|
| 8 |
import gradio as gr
|
| 9 |
import torch
|
| 10 |
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
|
@@ -104,15 +105,15 @@ def get_voice_choices():
|
|
| 104 |
|
| 105 |
# Obtener las voces al inicio del script
|
| 106 |
AVAILABLE_VOICES = get_voice_choices()
|
| 107 |
-
DEFAULT_VOICE_ID = "es-
|
| 108 |
DEFAULT_VOICE_NAME = DEFAULT_VOICE_ID
|
| 109 |
for text, voice_id in AVAILABLE_VOICES:
|
| 110 |
if voice_id == DEFAULT_VOICE_ID:
|
| 111 |
DEFAULT_VOICE_NAME = text
|
| 112 |
break
|
| 113 |
if DEFAULT_VOICE_ID not in [v[1] for v in AVAILABLE_VOICES]:
|
| 114 |
-
DEFAULT_VOICE_ID = AVAILABLE_VOICES[0][1] if AVAILABLE_VOICES else "
|
| 115 |
-
DEFAULT_VOICE_NAME = AVAILABLE_VOICES[0][0] if AVAILABLE_VOICES else "
|
| 116 |
logger.info(f"Voz por defecto seleccionada (ID): {DEFAULT_VOICE_ID}")
|
| 117 |
|
| 118 |
# Clave API de Pexels
|
|
@@ -243,12 +244,22 @@ async def text_to_speech(text, output_path, voice):
|
|
| 243 |
communicate = edge_tts.Communicate(text, voice)
|
| 244 |
await communicate.save(output_path)
|
| 245 |
if os.path.exists(output_path) and os.path.getsize(output_path) > 100:
|
| 246 |
-
logger.info(f"Audio guardado exitosamente en: {output_path}")
|
| 247 |
return True
|
| 248 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
return False
|
| 250 |
except Exception as e:
|
| 251 |
-
logger.error(f"Error en
|
| 252 |
return False
|
| 253 |
|
| 254 |
def download_video_file(url, temp_dir):
|
|
@@ -364,22 +375,41 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
|
|
| 364 |
|
| 365 |
# 2. Generar audio de voz
|
| 366 |
voz_path = os.path.join(temp_dir_intermediate, "voz.mp3")
|
| 367 |
-
tts_voices_to_try = [selected_voice, "es-MX-DaliaNeural"]
|
| 368 |
tts_success = False
|
|
|
|
|
|
|
|
|
|
| 369 |
|
| 370 |
for current_voice in tts_voices_to_try:
|
| 371 |
logger.info(f"Intentando TTS con voz: {current_voice}")
|
| 372 |
try:
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
except Exception as e:
|
| 379 |
logger.error(f"Error en TTS con voz '{current_voice}': {str(e)}")
|
| 380 |
|
| 381 |
if not tts_success or not os.path.exists(voz_path) or os.path.getsize(voz_path) <= 100:
|
| 382 |
-
raise ValueError(f"Error generando voz. Intentos con {tts_voices_to_try} fallaron.")
|
| 383 |
|
| 384 |
temp_intermediate_files.append(voz_path)
|
| 385 |
audio_tts_original = AudioFileClip(voz_path)
|
|
@@ -477,7 +507,7 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
|
|
| 477 |
if musica_file:
|
| 478 |
try:
|
| 479 |
music_path = os.path.join(temp_dir_intermediate, "musica_bg.mp3")
|
| 480 |
-
shutil.copyfile(musica_file, music_path)
|
| 481 |
temp_intermediate_files.append(music_path)
|
| 482 |
musica_audio_original = AudioFileClip(music_path)
|
| 483 |
if musica_audio_original.duration > 0:
|
|
@@ -497,7 +527,9 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
|
|
| 497 |
video_final = video_base.set_audio(final_audio)
|
| 498 |
output_filename = f"video_{int(datetime.now().timestamp())}.mp4"
|
| 499 |
output_path = os.path.join(temp_dir_intermediate, output_filename)
|
| 500 |
-
|
|
|
|
|
|
|
| 501 |
|
| 502 |
video_final.write_videofile(
|
| 503 |
output_path,
|
|
@@ -510,11 +542,13 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
|
|
| 510 |
logger='bar'
|
| 511 |
)
|
| 512 |
|
| 513 |
-
shutil.
|
| 514 |
-
|
|
|
|
|
|
|
| 515 |
total_time = (datetime.now() - start_time).total_seconds()
|
| 516 |
logger.info(f"Video generado en {total_time:.2f}s")
|
| 517 |
-
return
|
| 518 |
|
| 519 |
except ValueError as ve:
|
| 520 |
logger.error(f"Error controlado: {str(ve)}")
|
|
@@ -554,11 +588,16 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
|
|
| 554 |
except:
|
| 555 |
pass
|
| 556 |
for path in temp_intermediate_files:
|
| 557 |
-
if os.path.isfile(path) and path !=
|
| 558 |
try:
|
| 559 |
os.remove(path)
|
| 560 |
except:
|
| 561 |
logger.warning(f"No se pudo eliminar {path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 562 |
|
| 563 |
async def run_app_async(prompt_type, prompt_ia, prompt_manual, musica_file, selected_voice):
|
| 564 |
logger.info("="*80)
|
|
@@ -579,12 +618,12 @@ async def run_app_async(prompt_type, prompt_ia, prompt_manual, musica_file, sele
|
|
| 579 |
|
| 580 |
try:
|
| 581 |
logger.info("Iniciando generación de video...")
|
| 582 |
-
video_path = await crear_video_async(prompt_type, input_text, selected_voice, musica_file)
|
| 583 |
if video_path and os.path.exists(video_path):
|
| 584 |
output_video = video_path
|
| 585 |
output_file = video_path
|
| 586 |
-
status_msg = gr.update(value=f"✅ Video generado exitosamente. Descarga
|
| 587 |
-
logger.info(f"Retornando video_path: {video_path}")
|
| 588 |
else:
|
| 589 |
status_msg = gr.update(value="❌ Error: Falló la generación del video.")
|
| 590 |
logger.error("No se generó video_path válido.")
|
|
@@ -658,22 +697,21 @@ with gr.Blocks(title="Generador de Videos con IA", theme=gr.themes.Soft()) as ap
|
|
| 658 |
)
|
| 659 |
|
| 660 |
prompt_type.change(
|
| 661 |
-
lambda x: (gr.update(visible=x == "Generar Guion con IA"), gr.update(visible=x == "Usar Mi Guion")),
|
| 662 |
inputs=prompt_type,
|
| 663 |
outputs=[ia_guion_column, manual_guion_column]
|
| 664 |
)
|
| 665 |
|
| 666 |
generate_btn.click(
|
| 667 |
-
lambda: (None, None, gr.update(value="⏳ Procesando... Esto puede tomar hasta 1 hora.")),
|
| 668 |
outputs=[video_output, file_output, status_output]
|
| 669 |
).then(
|
| 670 |
-
run_app,
|
| 671 |
inputs=[prompt_type, prompt_ia, prompt_manual, musica_input, voice_dropdown],
|
| 672 |
outputs=[video_output, file_output, status_output],
|
| 673 |
-
queue=True
|
| 674 |
-
_js="() => { setTimeout(() => window.location.reload(), 3600000); }"
|
| 675 |
).then(
|
| 676 |
-
lambda video_path, file_output, status_msg: gr.update(visible=file_output.value is not None),
|
| 677 |
inputs=[video_output, file_output, status_output],
|
| 678 |
outputs=[file_output]
|
| 679 |
)
|
|
@@ -685,7 +723,7 @@ with gr.Blocks(title="Generador de Videos con IA", theme=gr.themes.Soft()) as ap
|
|
| 685 |
3. Sube música (opcional).
|
| 686 |
4. Selecciona la voz.
|
| 687 |
5. Haz clic en "✨ Generar Video".
|
| 688 |
-
6. Revisa el estado. Si el video se genera, estará disponible en /
|
| 689 |
7. Consulta `video_generator_full.log` para detalles.
|
| 690 |
""")
|
| 691 |
|
|
|
|
| 5 |
import requests
|
| 6 |
from datetime import datetime
|
| 7 |
import edge_tts
|
| 8 |
+
from gtts import gTTS
|
| 9 |
import gradio as gr
|
| 10 |
import torch
|
| 11 |
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
|
|
|
| 105 |
|
| 106 |
# Obtener las voces al inicio del script
|
| 107 |
AVAILABLE_VOICES = get_voice_choices()
|
| 108 |
+
DEFAULT_VOICE_ID = "es-MX-DaliaNeural" # Cambiado a una voz más estable
|
| 109 |
DEFAULT_VOICE_NAME = DEFAULT_VOICE_ID
|
| 110 |
for text, voice_id in AVAILABLE_VOICES:
|
| 111 |
if voice_id == DEFAULT_VOICE_ID:
|
| 112 |
DEFAULT_VOICE_NAME = text
|
| 113 |
break
|
| 114 |
if DEFAULT_VOICE_ID not in [v[1] for v in AVAILABLE_VOICES]:
|
| 115 |
+
DEFAULT_VOICE_ID = AVAILABLE_VOICES[0][1] if AVAILABLE_VOICES else "es-MX-DaliaNeural"
|
| 116 |
+
DEFAULT_VOICE_NAME = AVAILABLE_VOICES[0][0] if AVAILABLE_VOICES else "Dalia (México) - Femenino"
|
| 117 |
logger.info(f"Voz por defecto seleccionada (ID): {DEFAULT_VOICE_ID}")
|
| 118 |
|
| 119 |
# Clave API de Pexels
|
|
|
|
| 244 |
communicate = edge_tts.Communicate(text, voice)
|
| 245 |
await communicate.save(output_path)
|
| 246 |
if os.path.exists(output_path) and os.path.getsize(output_path) > 100:
|
| 247 |
+
logger.info(f"Audio guardado exitosamente con edge_tts en: {output_path}")
|
| 248 |
return True
|
| 249 |
+
logger.warning(f"edge_tts falló, intentando gTTS...")
|
| 250 |
+
except Exception as e:
|
| 251 |
+
logger.error(f"Error en edge_tts con voz '{voice}': {str(e)}")
|
| 252 |
+
|
| 253 |
+
try:
|
| 254 |
+
tts = gTTS(text=text, lang='es')
|
| 255 |
+
tts.save(output_path)
|
| 256 |
+
if os.path.exists(output_path) and os.path.getsize(output_path) > 100:
|
| 257 |
+
logger.info(f"Audio guardado exitosamente con gTTS en: {output_path}")
|
| 258 |
+
return True
|
| 259 |
+
logger.error(f"gTTS falló o archivo vacío en: {output_path}")
|
| 260 |
return False
|
| 261 |
except Exception as e:
|
| 262 |
+
logger.error(f"Error en gTTS: {str(e)}")
|
| 263 |
return False
|
| 264 |
|
| 265 |
def download_video_file(url, temp_dir):
|
|
|
|
| 375 |
|
| 376 |
# 2. Generar audio de voz
|
| 377 |
voz_path = os.path.join(temp_dir_intermediate, "voz.mp3")
|
| 378 |
+
tts_voices_to_try = [selected_voice, "es-MX-DaliaNeural"]
|
| 379 |
tts_success = False
|
| 380 |
+
max_chunk_length = 1000
|
| 381 |
+
text_chunks = [guion[i:i + max_chunk_length] for i in range(0, len(guion), max_chunk_length)]
|
| 382 |
+
logger.info(f"Texto dividido en {len(text_chunks)} fragmentos para TTS")
|
| 383 |
|
| 384 |
for current_voice in tts_voices_to_try:
|
| 385 |
logger.info(f"Intentando TTS con voz: {current_voice}")
|
| 386 |
try:
|
| 387 |
+
temp_audio_files = []
|
| 388 |
+
for i, chunk in enumerate(text_chunks):
|
| 389 |
+
temp_path = os.path.join(temp_dir_intermediate, f"voz_chunk_{i}.mp3")
|
| 390 |
+
tts_success = await text_to_speech(chunk, temp_path, current_voice)
|
| 391 |
+
if tts_success and os.path.exists(temp_path) and os.path.getsize(temp_path) > 100:
|
| 392 |
+
temp_audio_files.append(temp_path)
|
| 393 |
+
else:
|
| 394 |
+
logger.warning(f"TTS falló para fragmento {i} con voz: {current_voice}")
|
| 395 |
+
break
|
| 396 |
+
if len(temp_audio_files) == len(text_chunks):
|
| 397 |
+
audio_clips = [AudioFileClip(f) for f in temp_audio_files]
|
| 398 |
+
concatenated_audio = concatenate_audioclips(audio_clips)
|
| 399 |
+
concatenated_audio.write_audiofile(voz_path, codec='mp3')
|
| 400 |
+
concatenated_audio.close()
|
| 401 |
+
for clip in audio_clips:
|
| 402 |
+
clip.close()
|
| 403 |
+
tts_success = os.path.exists(voz_path) and os.path.getsize(voz_path) > 100
|
| 404 |
+
temp_intermediate_files.extend(temp_audio_files)
|
| 405 |
+
if tts_success:
|
| 406 |
+
logger.info(f"TTS exitoso con voz: {current_voice}")
|
| 407 |
+
break
|
| 408 |
except Exception as e:
|
| 409 |
logger.error(f"Error en TTS con voz '{current_voice}': {str(e)}")
|
| 410 |
|
| 411 |
if not tts_success or not os.path.exists(voz_path) or os.path.getsize(voz_path) <= 100:
|
| 412 |
+
raise ValueError(f"Error generando voz. Intentos con {tts_voices_to_try} y gTTS fallaron.")
|
| 413 |
|
| 414 |
temp_intermediate_files.append(voz_path)
|
| 415 |
audio_tts_original = AudioFileClip(voz_path)
|
|
|
|
| 507 |
if musica_file:
|
| 508 |
try:
|
| 509 |
music_path = os.path.join(temp_dir_intermediate, "musica_bg.mp3")
|
| 510 |
+
shutil.copyfile(musica_file.name if hasattr(musica_file, 'name') else musica_file, music_path)
|
| 511 |
temp_intermediate_files.append(music_path)
|
| 512 |
musica_audio_original = AudioFileClip(music_path)
|
| 513 |
if musica_audio_original.duration > 0:
|
|
|
|
| 527 |
video_final = video_base.set_audio(final_audio)
|
| 528 |
output_filename = f"video_{int(datetime.now().timestamp())}.mp4"
|
| 529 |
output_path = os.path.join(temp_dir_intermediate, output_filename)
|
| 530 |
+
persistent_dir = "/data"
|
| 531 |
+
os.makedirs(persistent_dir, exist_ok=True)
|
| 532 |
+
persistent_path = os.path.join(persistent_dir, output_filename)
|
| 533 |
|
| 534 |
video_final.write_videofile(
|
| 535 |
output_path,
|
|
|
|
| 542 |
logger='bar'
|
| 543 |
)
|
| 544 |
|
| 545 |
+
shutil.move(output_path, persistent_path)
|
| 546 |
+
download_url = f"https://gnosticdev-invideo-basic.hf.space/file={persistent_path}"
|
| 547 |
+
logger.info(f"Video guardado en: {persistent_path}")
|
| 548 |
+
logger.info(f"URL de descarga: {download_url}")
|
| 549 |
total_time = (datetime.now() - start_time).total_seconds()
|
| 550 |
logger.info(f"Video generado en {total_time:.2f}s")
|
| 551 |
+
return persistent_path, download_url
|
| 552 |
|
| 553 |
except ValueError as ve:
|
| 554 |
logger.error(f"Error controlado: {str(ve)}")
|
|
|
|
| 588 |
except:
|
| 589 |
pass
|
| 590 |
for path in temp_intermediate_files:
|
| 591 |
+
if os.path.isfile(path) and path != persistent_path:
|
| 592 |
try:
|
| 593 |
os.remove(path)
|
| 594 |
except:
|
| 595 |
logger.warning(f"No se pudo eliminar {path}")
|
| 596 |
+
try:
|
| 597 |
+
if os.path.exists(temp_dir_intermediate):
|
| 598 |
+
shutil.rmtree(temp_dir_intermediate)
|
| 599 |
+
except:
|
| 600 |
+
logger.warning(f"No se pudo eliminar directorio temporal {temp_dir_intermediate}")
|
| 601 |
|
| 602 |
async def run_app_async(prompt_type, prompt_ia, prompt_manual, musica_file, selected_voice):
|
| 603 |
logger.info("="*80)
|
|
|
|
| 618 |
|
| 619 |
try:
|
| 620 |
logger.info("Iniciando generación de video...")
|
| 621 |
+
video_path, download_url = await crear_video_async(prompt_type, input_text, selected_voice, musica_file)
|
| 622 |
if video_path and os.path.exists(video_path):
|
| 623 |
output_video = video_path
|
| 624 |
output_file = video_path
|
| 625 |
+
status_msg = gr.update(value=f"✅ Video generado exitosamente. Descarga: {download_url}")
|
| 626 |
+
logger.info(f"Retornando video_path: {video_path}, URL: {download_url}")
|
| 627 |
else:
|
| 628 |
status_msg = gr.update(value="❌ Error: Falló la generación del video.")
|
| 629 |
logger.error("No se generó video_path válido.")
|
|
|
|
| 697 |
)
|
| 698 |
|
| 699 |
prompt_type.change(
|
| 700 |
+
fn=lambda x: (gr.update(visible=x == "Generar Guion con IA"), gr.update(visible=x == "Usar Mi Guion")),
|
| 701 |
inputs=prompt_type,
|
| 702 |
outputs=[ia_guion_column, manual_guion_column]
|
| 703 |
)
|
| 704 |
|
| 705 |
generate_btn.click(
|
| 706 |
+
fn=lambda: (None, None, gr.update(value="⏳ Procesando... Esto puede tomar hasta 1 hora.")),
|
| 707 |
outputs=[video_output, file_output, status_output]
|
| 708 |
).then(
|
| 709 |
+
fn=run_app,
|
| 710 |
inputs=[prompt_type, prompt_ia, prompt_manual, musica_input, voice_dropdown],
|
| 711 |
outputs=[video_output, file_output, status_output],
|
| 712 |
+
queue=True
|
|
|
|
| 713 |
).then(
|
| 714 |
+
fn=lambda video_path, file_output, status_msg: gr.update(visible=file_output.value is not None),
|
| 715 |
inputs=[video_output, file_output, status_output],
|
| 716 |
outputs=[file_output]
|
| 717 |
)
|
|
|
|
| 723 |
3. Sube música (opcional).
|
| 724 |
4. Selecciona la voz.
|
| 725 |
5. Haz clic en "✨ Generar Video".
|
| 726 |
+
6. Revisa el estado. Si el video se genera, estará disponible en /data.
|
| 727 |
7. Consulta `video_generator_full.log` para detalles.
|
| 728 |
""")
|
| 729 |
|