gnosticdev commited on
Commit
f26ba3a
verified
1 Parent(s): 5f972c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -32
app.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  from huggingface_hub import hf_hub_download
4
  from torch.nn import Linear, Sequential, Tanh
5
  import soundfile as sf
 
6
  from transformers import GPT2Tokenizer, GPT2LMHeadModel
7
  from keybert import KeyBERT
8
  from moviepy.editor import (
@@ -55,32 +56,28 @@ def load_json_from_path(path):
55
  # La carga completa del modelo ToucanTTS se hace a trav茅s de hf_hub_download, por lo que no es necesario el c贸digo completo aqu铆.
56
  # La clase ControllableInterface es una adaptaci贸n de la original.
57
 
58
- class ToucanTTSInterface:
59
- def __init__(self, gpu_id="cpu"):
60
- self.device = torch.device("cpu") if gpu_id == "cpu" else torch.device("cuda")
61
- tts_model_path = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="best.pt")
62
- vocoder_model_path = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="vocoder.pt")
63
- # Importamos la clase aqu铆 para evitar problemas de dependencias circulares
64
- from TrainingInterfaces.Text_to_Spectrogram.ToucanTTS.ToucanTTS import ToucanTTS as ToucanTTS_Model
65
- self.tts_model = ToucanTTS_Model()
66
- self.tts_model.load_state_dict(torch.load(tts_model_path, map_location=self.device)["model"])
67
- self.vocoder_model = torch.jit.load(vocoder_model_path).to(self.device).eval()
68
- path_to_iso_list = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="iso_to_id.json")
69
- self.iso_to_id = load_json_from_path(path_to_iso_list)
70
- self.tts_model.to(self.device)
71
-
72
- def read(self, text, language="spa", accent="spa"):
73
- with torch.inference_mode():
74
- style_embedding = self.tts_model.style_embedding_function(torch.randn([1, 1, 192]).to(self.device)).squeeze()
75
- output_wave, output_sr, _ = self.tts_model.read(
76
- text=text,
77
- style_embedding=style_embedding,
78
- language_id=self.iso_to_id[language],
79
- accent_id=self.iso_to_id[accent],
80
- vocoder=self.vocoder_model,
81
- device=self.device
82
- )
83
- return output_sr, output_wave.cpu().numpy()
84
 
85
  # ------------------- Configuraci贸n & Globals -------------------
86
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -326,12 +323,7 @@ def worker(task_id: str, mode: str, topic: str, user_script: str, music: str | N
326
  try:
327
  text = topic if mode == "Generar Guion con IA" else user_script
328
  # Como ToucanTTS no est谩 completamente integrado, simularemos un error por ahora.
329
- # result_tmp_path = build_video(text, mode == "Generar Guion con IA", music, task_id)
330
- # final_path = os.path.join(RESULTS_DIR, f"{task_id}.mp4")
331
- # shutil.copy2(result_tmp_path, final_path)
332
- # TASKS[task_id].update({"status": "done", "result": final_path})
333
- # shutil.rmtree(os.path.dirname(result_tmp_path))
334
- raise NotImplementedError("La integraci贸n del motor TTS autocontenido requiere refactorizaci贸n que no se ha completado.")
335
  except Exception as e:
336
  logger.error(f"Error en el worker para la tarea {task_id}: {e}", exc_info=True)
337
  TASKS[task_id].update({"status": "error", "error": str(e)})
 
3
  from huggingface_hub import hf_hub_download
4
  from torch.nn import Linear, Sequential, Tanh
5
  import soundfile as sf
6
+ import edge_tts
7
  from transformers import GPT2Tokenizer, GPT2LMHeadModel
8
  from keybert import KeyBERT
9
  from moviepy.editor import (
 
56
  # La carga completa del modelo ToucanTTS se hace a trav茅s de hf_hub_download, por lo que no es necesario el c贸digo completo aqu铆.
57
  # La clase ControllableInterface es una adaptaci贸n de la original.
58
 
59
+ class EdgeTTSInterface:
60
+ def __init__(self, voice="es-ES-AlvaroNeural"): # puedes cambiar a "es-ES-ElviraNeural"
61
+ self.voice = voice
62
+
63
+ def read(self, text, language="es", accent=None):
64
+ tmp_path = tempfile.mktemp(suffix=".wav")
65
+
66
+ async def _synth():
67
+ communicate = edge_tts.Communicate(text, self.voice)
68
+ await communicate.save(tmp_path)
69
+
70
+ asyncio.run(_synth())
71
+
72
+ # cargar el wav en numpy
73
+ wav, sr = sf.read(tmp_path, dtype="float32")
74
+ return sr, wav
75
+
76
+ def get_tts_interface():
77
+ global tts_interface
78
+ if tts_interface is None:
79
+ tts_interface = EdgeTTSInterface()
80
+ return tts_interface
 
 
 
 
81
 
82
  # ------------------- Configuraci贸n & Globals -------------------
83
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 
323
  try:
324
  text = topic if mode == "Generar Guion con IA" else user_script
325
  # Como ToucanTTS no est谩 completamente integrado, simularemos un error por ahora.
326
+
 
 
 
 
 
327
  except Exception as e:
328
  logger.error(f"Error en el worker para la tarea {task_id}: {e}", exc_info=True)
329
  TASKS[task_id].update({"status": "error", "error": str(e)})