Spaces:

gnosticdev
/

INVIDEO_BASIC

Sleeping

App Files Files Community

gnosticdev commited on Sep 1

Commit

f26ba3a

verified ·

1 Parent(s): 5f972c1

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -32

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch
 from huggingface_hub import hf_hub_download
 from torch.nn import Linear, Sequential, Tanh
 import soundfile as sf
 from transformers import GPT2Tokenizer, GPT2LMHeadModel
 from keybert import KeyBERT
 from moviepy.editor import (
@@ -55,32 +56,28 @@ def load_json_from_path(path):
 # La carga completa del modelo ToucanTTS se hace a través de hf_hub_download, por lo que no es necesario el código completo aquí.
 # La clase ControllableInterface es una adaptación de la original.
-class ToucanTTSInterface:
-    def __init__(self, gpu_id="cpu"):
-        self.device = torch.device("cpu") if gpu_id == "cpu" else torch.device("cuda")
-        tts_model_path = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="best.pt")
-        vocoder_model_path = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="vocoder.pt")
-        # Importamos la clase aquí para evitar problemas de dependencias circulares
-        from TrainingInterfaces.Text_to_Spectrogram.ToucanTTS.ToucanTTS import ToucanTTS as ToucanTTS_Model
-        self.tts_model = ToucanTTS_Model()
-        self.tts_model.load_state_dict(torch.load(tts_model_path, map_location=self.device)["model"])
-        self.vocoder_model = torch.jit.load(vocoder_model_path).to(self.device).eval()
-        path_to_iso_list = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="iso_to_id.json")
-        self.iso_to_id = load_json_from_path(path_to_iso_list)
-        self.tts_model.to(self.device)
-    def read(self, text, language="spa", accent="spa"):
-        with torch.inference_mode():
-            style_embedding = self.tts_model.style_embedding_function(torch.randn([1, 1, 192]).to(self.device)).squeeze()
-            output_wave, output_sr, _ = self.tts_model.read(
-                text=text,
-                style_embedding=style_embedding,
-                language_id=self.iso_to_id[language],
-                accent_id=self.iso_to_id[accent],
-                vocoder=self.vocoder_model,
-                device=self.device
-            )
-            return output_sr, output_wave.cpu().numpy()
 # ------------------- Configuración & Globals -------------------
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -326,12 +323,7 @@ def worker(task_id: str, mode: str, topic: str, user_script: str, music: str | N
     try:
         text = topic if mode == "Generar Guion con IA" else user_script
         # Como ToucanTTS no está completamente integrado, simularemos un error por ahora.
-        # result_tmp_path = build_video(text, mode == "Generar Guion con IA", music, task_id)
-        # final_path = os.path.join(RESULTS_DIR, f"{task_id}.mp4")
-        # shutil.copy2(result_tmp_path, final_path)
-        # TASKS[task_id].update({"status": "done", "result": final_path})
-        # shutil.rmtree(os.path.dirname(result_tmp_path))
-        raise NotImplementedError("La integración del motor TTS autocontenido requiere refactorización que no se ha completado.")
     except Exception as e:
         logger.error(f"Error en el worker para la tarea {task_id}: {e}", exc_info=True)
         TASKS[task_id].update({"status": "error", "error": str(e)})

 from huggingface_hub import hf_hub_download
 from torch.nn import Linear, Sequential, Tanh
 import soundfile as sf
+import edge_tts
 from transformers import GPT2Tokenizer, GPT2LMHeadModel
 from keybert import KeyBERT
 from moviepy.editor import (
 # La carga completa del modelo ToucanTTS se hace a través de hf_hub_download, por lo que no es necesario el código completo aquí.
 # La clase ControllableInterface es una adaptación de la original.
+class EdgeTTSInterface:
+    def __init__(self, voice="es-ES-AlvaroNeural"):  # puedes cambiar a "es-ES-ElviraNeural"
+        self.voice = voice
+    def read(self, text, language="es", accent=None):
+        tmp_path = tempfile.mktemp(suffix=".wav")
+        async def _synth():
+            communicate = edge_tts.Communicate(text, self.voice)
+            await communicate.save(tmp_path)
+        asyncio.run(_synth())
+        # cargar el wav en numpy
+        wav, sr = sf.read(tmp_path, dtype="float32")
+        return sr, wav
+def get_tts_interface():
+    global tts_interface
+    if tts_interface is None:
+        tts_interface = EdgeTTSInterface()
+    return tts_interface
 # ------------------- Configuración & Globals -------------------
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
     try:
         text = topic if mode == "Generar Guion con IA" else user_script
         # Como ToucanTTS no está completamente integrado, simularemos un error por ahora.
     except Exception as e:
         logger.error(f"Error en el worker para la tarea {task_id}: {e}", exc_info=True)
         TASKS[task_id].update({"status": "error", "error": str(e)})