Spaces:
Sleeping
Sleeping
| import src.FVoiceTheme as FVoiceTheme | |
| import gradio as gr | |
| from pathlib import Path | |
| import os | |
| import time | |
| from src import utils | |
| import logging | |
| import warnings | |
| # --- CONFIGURACIÓN PARA LIMPIAR LA CONSOLA --- | |
| # 1. Suprimir la advertencia de 'weight_norm' de PyTorch | |
| warnings.filterwarnings("ignore", category=FutureWarning) | |
| # 2. Establecer un nivel de registro más alto para las bibliotecas ruidosas | |
| logging.getLogger('urllib3').setLevel(logging.WARNING) | |
| logging.getLogger('httpcore').setLevel(logging.WARNING) | |
| logging.getLogger('httpx').setLevel(logging.WARNING) | |
| logging.getLogger('asyncio').setLevel(logging.WARNING) | |
| # 1. Importa tu clase TTS desde el archivo de inferencia | |
| from inference import TTS | |
| # --- CONFIGURACIÓN Y GESTIÓN DINÁMICA DE MODELOS --- | |
| # Rutas a los directorios de modelos y configuraciones | |
| MODEL_DIR = "./models/" | |
| CONFIG_DIR = "./configs/" | |
| # Crear un directorio temporal para los audios generados | |
| os.makedirs("temp_audio", exist_ok=True) | |
| # Caché para almacenar los motores TTS cargados y evitar recargarlos | |
| tts_engines_cache = {} | |
| def get_available_models(): | |
| """Escanea el directorio de modelos y devuelve una lista de archivos .pth y .onnx.""" | |
| if not os.path.exists(MODEL_DIR): | |
| return [] | |
| return [f for f in os.listdir(MODEL_DIR) if f.endswith(".pth") or f.endswith(".onnx")] | |
| def get_model_info(model_name): | |
| """Devuelve información básica sobre el modelo seleccionado.""" | |
| if not model_name: | |
| return "Selecciona un modelo para ver su información" | |
| model_path = os.path.join(MODEL_DIR, model_name) | |
| if os.path.exists(model_path): | |
| file_size = os.path.getsize(model_path) / (1024 * 1024) # MB | |
| file_type = "ONNX" if model_name.endswith(".onnx") else "PyTorch" | |
| status = "✅ Cargado" if model_name in tts_engines_cache else "⏳ Sin cargar" | |
| return f"**{model_name}** | {file_type} | {file_size:.1f} MB | {status}" | |
| return "Información no disponible" | |
| def load_engine(model_name): | |
| """ | |
| Carga un motor TTS si no está en la caché. | |
| Busca dinámicamente el archivo de configuración correspondiente. | |
| """ | |
| if model_name not in tts_engines_cache: | |
| print(f"🔄 Cargando modelo: {model_name}...") | |
| # Construir rutas dinámicamente | |
| model_path = os.path.join(MODEL_DIR, model_name) | |
| base_name = os.path.splitext(model_name)[0] | |
| config_path = os.path.join(CONFIG_DIR, f"{base_name}.json") | |
| # Verificar que ambos archivos existan | |
| if not os.path.exists(model_path): | |
| raise FileNotFoundError(f"No se encontró el archivo del modelo: {model_path}") | |
| if not os.path.exists(config_path): | |
| raise FileNotFoundError(f"No se encontró el archivo de configuración correspondiente: {config_path}") | |
| # Crear y cachear la nueva instancia del motor con su config específica | |
| print(f"📁 Usando configuración: {config_path}") | |
| tts_engines_cache[model_name] = TTS( | |
| config_path=config_path, | |
| model_path=model_path | |
| ) | |
| print(f"✅ Modelo {model_name} cargado y cacheado.") | |
| return tts_engines_cache[model_name] | |
| def inference(model_name, prompt, progress=gr.Progress()): | |
| """ | |
| Carga el modelo seleccionado (si es necesario) y genera el audio. | |
| """ | |
| if not model_name: | |
| return None, "⚠️ **Error:** Por favor, selecciona un modelo.", "" | |
| if not prompt.strip(): | |
| return None, "⚠️ **Error:** Por favor, ingresa un texto.", "" | |
| try: | |
| progress(0.2, desc="Cargando modelo...") | |
| tts_engine = load_engine(model_name) | |
| progress(0.5, desc="Procesando texto...") | |
| output_path = os.path.join("temp_audio", f"audio_{int(time.time())}.wav") | |
| progress(0.8, desc="Generando audio...") | |
| tts_engine.text_to_speech(prompt, output_path, noise_scale=0.75, noise_scale_w=0.8, length_scale=1) | |
| progress(1.0, desc="¡Completado!") | |
| success_msg = f""" | |
| ### ✅ Audio Generado con {model_name} | |
| **📝 Texto:** {len(prompt)} caracteres procesados | |
| **🎵 Listo para reproducir** | |
| """ | |
| return output_path, success_msg, get_model_info(model_name) | |
| except Exception as e: | |
| error_msg = f""" | |
| ### ❌ Error Durante la Generación | |
| **Modelo:** {model_name} | |
| **Error:** {str(e)} | |
| Verifica que el modelo y su configuración sean correctos. | |
| """ | |
| print(f"❌ Ocurrió un error durante la inferencia: {e}") | |
| return None, error_msg, get_model_info(model_name) | |
| def get_example_texts(): | |
| """Devuelve textos de ejemplo para probar.""" | |
| return [ | |
| "Hola, soy F-VOICE, un sistema de síntesis de voz neuronal.", | |
| "La inteligencia artificial está transformando el mundo de la síntesis de voz.", | |
| "Buenos días, espero que tengas un excelente día.", | |
| "Este es un ejemplo de síntesis de voz con tecnología avanzada.", | |
| "¿Cómo estás? Me alegra poder hablar contigo." | |
| ] | |
| # --- INTERFAZ DE USUARIO CON GRADIO --- | |
| fvoice_theme = FVoiceTheme.FVoiceTheme() | |
| css = """ | |
| #logo-header { | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| padding: 25px; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| border-radius: 20px; | |
| margin-bottom: 30px; | |
| box-shadow: 0 8px 25px rgba(0,0,0,0.3); | |
| } | |
| #logo-header img { | |
| border-radius: 50%; | |
| box-shadow: 0 4px 15px rgba(0,0,0,0.4); | |
| transition: transform 0.3s ease; | |
| } | |
| #logo-header img:hover { | |
| transform: scale(1.05); | |
| } | |
| #F_VOICE_header { | |
| background: linear-gradient(45deg, #FFE3D8, #FFF); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| text-shadow: 3px 3px 6px rgba(0,0,0,0.4); | |
| font-weight: bold; | |
| } | |
| .main-container { | |
| background: rgba(255, 255, 255, 0.03); | |
| border-radius: 15px; | |
| padding: 25px; | |
| margin: 15px 0; | |
| border: 1px solid rgba(255, 255, 255, 0.1); | |
| backdrop-filter: blur(10px); | |
| } | |
| .model-info { | |
| background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); | |
| color: white; | |
| padding: 15px; | |
| border-radius: 12px; | |
| margin: 10px 0; | |
| text-align: center; | |
| font-weight: 500; | |
| } | |
| .examples-row { | |
| display: flex; | |
| gap: 10px; | |
| margin: 15px 0; | |
| flex-wrap: wrap; | |
| } | |
| .example-btn { | |
| background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); | |
| border: none; | |
| color: white; | |
| padding: 8px 15px; | |
| border-radius: 20px; | |
| cursor: pointer; | |
| font-size: 14px; | |
| transition: all 0.3s ease; | |
| } | |
| .example-btn:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 5px 15px rgba(240, 147, 251, 0.4); | |
| } | |
| .generate-btn { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| border: none; | |
| color: white; | |
| padding: 15px 30px; | |
| border-radius: 25px; | |
| font-size: 18px; | |
| font-weight: bold; | |
| cursor: pointer; | |
| transition: all 0.3s ease; | |
| box-shadow: 0 5px 20px rgba(102, 126, 234, 0.4); | |
| } | |
| .generate-btn:hover { | |
| transform: translateY(-3px); | |
| box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6); | |
| } | |
| a { text-decoration: none; } | |
| .footer-info { | |
| text-align: center; | |
| margin-top: 30px; | |
| padding: 20px; | |
| background: rgba(255, 255, 255, 0.02); | |
| border-radius: 15px; | |
| border: 1px solid rgba(255, 255, 255, 0.05); | |
| } | |
| """ | |
| gr.set_static_paths(paths=[Path.cwd().absolute()/"src/assets"]) | |
| available_models = get_available_models() | |
| with gr.Blocks(title="F-VOICE - Síntesis de Voz Neuronal", theme=fvoice_theme, css=css) as demo: | |
| # Header con logo y título | |
| gr.HTML(""" | |
| <div id="logo-header"> | |
| <a href="https://github.com/SIAFI-UNAM/F-VOICE" target="_blank"> | |
| <div style="display: flex; align-items: center; gap: 20px;"> | |
| <img src='/file=src/assets/logo.jpeg' width='100' height='100' /> | |
| <div> | |
| <h1 id='F_VOICE_header' style='margin: 0; font-size: 55px;'>F-VOICE</h1> | |
| <p style='margin: 5px 0 0 0; color: #FFE3D8; font-size: 20px; font-weight: 300;'> | |
| Sistema de Síntesis de Voz Neuronal ✨ | |
| </p> | |
| </div> | |
| </div> | |
| </a> | |
| <div style="text-align: right; color: #FFE3D8; font-size: 16px;"> | |
| <p style="margin: 5px 0;">🎤 <strong>""" + str(len(available_models)) + """</strong> modelos disponibles</p> | |
| <p style="margin: 5px 0;">🤖 Powered by <strong>AI</strong></p> | |
| <p style="margin: 5px 0;">⚡ Síntesis en <strong>tiempo real</strong></p> | |
| </div> | |
| </div> | |
| """) | |
| # Descripción principal | |
| gr.HTML(""" | |
| <div class="main-container"> | |
| <div style="text-align: center; padding: 20px;"> | |
| <h2 style="color: #FFE3D8; margin-bottom: 15px;"> | |
| 🚀 Convierte texto en voz natural con IA | |
| </h2> | |
| <p style="font-size: 18px; line-height: 1.6; color: #E0E0E0; max-width: 800px; margin: 0 auto;"> | |
| <strong>F-VOICE</strong> utiliza modelos neuronales de última generación para generar | |
| síntesis de voz realista y expresiva. Simplemente selecciona un modelo, | |
| escribe tu texto y obtén audio de alta calidad al instante. | |
| </p> | |
| </div> | |
| </div> | |
| """) | |
| # Interfaz principal simplificada | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| prompt = gr.TextArea( | |
| placeholder="✍️ Escribe aquí el texto que quieres convertir a voz...\n\nEjemplo: Hola mundo, este es F-VOICE generando mi voz.", | |
| label="📝 Tu Texto", | |
| lines=5, | |
| max_lines=10 | |
| ) | |
| # Ejemplos rápidos con HTML personalizado | |
| gr.HTML(""" | |
| <div style="margin: 20px 0;"> | |
| <p style="margin-bottom: 10px; font-weight: 500; color: #FFE3D8;">💡 Prueba estos ejemplos:</p> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| example_btns = [] | |
| examples = get_example_texts() | |
| for i, example in enumerate(examples[:3]): | |
| btn = gr.Button(f"Ejemplo {i+1}", size="sm", variant="secondary") | |
| btn.click(lambda x=example: x, outputs=prompt) | |
| with gr.Column(scale=1): | |
| model = gr.Dropdown( | |
| available_models, | |
| label="🎤 Modelo de Voz", | |
| value=available_models[0] if available_models else None, | |
| info="Selecciona la voz que prefieras" | |
| ) | |
| # Información del modelo con estilo | |
| model_info = gr.HTML( | |
| f'<div class="model-info">{get_model_info(available_models[0] if available_models else "")}</div>' | |
| ) | |
| # Botón de generación grande y llamativo | |
| with gr.Row(): | |
| with gr.Column(): | |
| btn = gr.Button("🎯 Generar Audio", variant="primary", size="lg", elem_classes=["generate-btn"]) | |
| # Resultado | |
| with gr.Row(): | |
| with gr.Column(): | |
| markdown_output = gr.Markdown(""" | |
| ### 🎵 Tu audio aparecerá aquí | |
| Selecciona un modelo, escribe tu texto y presiona **"Generar Audio"** para comenzar. | |
| 💡 **Consejo:** Los textos más largos y con buena puntuación dan mejores resultados. | |
| """) | |
| audio = gr.Audio( | |
| value="assets/preview.wav" if os.path.exists("assets/preview.wav") else None, | |
| autoplay=False, | |
| label="🔊 Audio Generado", | |
| interactive=False, | |
| show_download_button=True | |
| ) | |
| # Footer informativo | |
| gr.HTML(""" | |
| <div class="footer-info"> | |
| <div style="display: flex; justify-content: center; gap: 30px; align-items: center; flex-wrap: wrap;"> | |
| <div style="text-align: center;"> | |
| <p style="margin: 5px 0; font-size: 14px; color: #B0B0B0;"> | |
| 🎨 <strong>Fácil de usar</strong><br> | |
| Interfaz intuitiva | |
| </p> | |
| </div> | |
| <div style="text-align: center;"> | |
| <p style="margin: 5px 0; font-size: 14px; color: #B0B0B0;"> | |
| ⚡ <strong>Rápido</strong><br> | |
| Generación instantánea | |
| </p> | |
| </div> | |
| <div style="text-align: center;"> | |
| <p style="margin: 5px 0; font-size: 14px; color: #B0B0B0;"> | |
| 🎯 <strong>Preciso</strong><br> | |
| Calidad profesional | |
| </p> | |
| </div> | |
| <div style="text-align: center;"> | |
| <p style="margin: 5px 0; font-size: 14px; color: #B0B0B0;"> | |
| 🔧 <strong>Versátil</strong><br> | |
| Múltiples modelos | |
| </p> | |
| </div> | |
| </div> | |
| <p style="margin-top: 15px; font-size: 12px; color: #888; text-align: center;"> | |
| Desarrollado con ❤️ usando tecnología de vanguardia en IA | |
| </p> | |
| </div> | |
| """) | |
| # Configurar eventos | |
| def update_model_info(model_name): | |
| return f'<div class="model-info">{get_model_info(model_name)}</div>' | |
| model.change(fn=update_model_info, inputs=[model], outputs=[model_info]) | |
| btn.click( | |
| fn=inference, | |
| inputs=[model, prompt], | |
| outputs=[audio, markdown_output, model_info] | |
| ) | |
| # Mensaje de inicio | |
| if __name__ == "__main__": | |
| print("=" * 60) | |
| print("🎤 F-VOICE - Sistema de Síntesis de Voz Neuronal") | |
| print("=" * 60) | |
| print(f"📁 Modelos encontrados: {len(available_models)}") | |
| if available_models: | |
| print("✅ Modelos disponibles:") | |
| for i, model in enumerate(available_models, 1): | |
| print(f" {i}. {model}") | |
| else: | |
| print("⚠️ ADVERTENCIA: No se encontraron modelos en './models/'") | |
| print("\n🚀 Iniciando aplicación...") | |
| print("=" * 60) | |
| demo.launch() |