Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ from tensorflow import keras
|
|
5 |
import torch
|
6 |
from huggingface_hub import hf_hub_download
|
7 |
from speechbrain.inference.TTS import Tacotron2
|
|
|
8 |
|
9 |
# Cargar modelo Tacotron2
|
10 |
tacotron2 = Tacotron2.from_hparams(
|
@@ -13,15 +14,51 @@ tacotron2 = Tacotron2.from_hparams(
|
|
13 |
run_opts={"device": "cpu"}
|
14 |
)
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# Funci贸n para convertir texto a audio
|
24 |
-
def text_to_audio(text):
|
25 |
# Crear un array vac铆o por defecto en caso de error
|
26 |
default_audio = np.zeros(8000, dtype=np.float32)
|
27 |
sample_rate = 8000 # Ajusta seg煤n la configuraci贸n de tu modelo
|
@@ -30,6 +67,16 @@ def text_to_audio(text):
|
|
30 |
return (sample_rate, default_audio)
|
31 |
|
32 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
# Convertir texto a mel-spectrograma con Tacotron2
|
34 |
mel_output, _, _ = tacotron2.encode_text(text)
|
35 |
mel = mel_output.detach().cpu().numpy().astype(np.float32)
|
@@ -91,16 +138,45 @@ def text_to_audio(text):
|
|
91 |
return (sample_rate, default_audio)
|
92 |
|
93 |
# Crear interfaz en Gradio
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
# Lanzar aplicaci贸n
|
104 |
if __name__ == "__main__":
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
5 |
import torch
|
6 |
from huggingface_hub import hf_hub_download
|
7 |
from speechbrain.inference.TTS import Tacotron2
|
8 |
+
import os
|
9 |
|
10 |
# Cargar modelo Tacotron2
|
11 |
tacotron2 = Tacotron2.from_hparams(
|
|
|
14 |
run_opts={"device": "cpu"}
|
15 |
)
|
16 |
|
17 |
+
# Diccionario para almacenar los modelos cargados
|
18 |
+
loaded_models = {}
|
19 |
+
|
20 |
+
# Modelos disponibles - define aqu铆 las 茅pocas que quieres incluir
|
21 |
+
available_models = {
|
22 |
+
"脡poca 100": "generator_epoch_100.keras",
|
23 |
+
"脡poca 1000": "generator_epoch_250.keras",
|
24 |
+
"脡poca 4200": "generator_epoch_500.keras",
|
25 |
+
"脡poca 4700": "generator_epoch_750.keras",
|
26 |
+
"脡poca 7700": "generator_epoch_1000.keras"
|
27 |
+
}
|
28 |
+
|
29 |
+
# Funci贸n para cargar un modelo espec铆fico
|
30 |
+
def load_generator_model(model_name):
|
31 |
+
if model_name in loaded_models:
|
32 |
+
return loaded_models[model_name]
|
33 |
+
|
34 |
+
try:
|
35 |
+
model_path = hf_hub_download(
|
36 |
+
repo_id="Bmo411/WGAN",
|
37 |
+
filename=model_name
|
38 |
+
)
|
39 |
+
model = keras.models.load_model(model_path, compile=False)
|
40 |
+
loaded_models[model_name] = model
|
41 |
+
print(f"Modelo {model_name} cargado correctamente")
|
42 |
+
return model
|
43 |
+
except Exception as e:
|
44 |
+
print(f"Error al cargar el modelo {model_name}: {e}")
|
45 |
+
# Si falla la carga, intentamos usar el modelo de la 茅poca 1000 como fallback
|
46 |
+
try:
|
47 |
+
fallback_model = "generator_epoch_1000.keras"
|
48 |
+
model_path = hf_hub_download(
|
49 |
+
repo_id="Bmo411/WGAN",
|
50 |
+
filename=fallback_model
|
51 |
+
)
|
52 |
+
model = keras.models.load_model(model_path, compile=False)
|
53 |
+
loaded_models[model_name] = model # Guardamos con el nombre original para evitar recargar
|
54 |
+
print(f"Usando modelo fallback {fallback_model}")
|
55 |
+
return model
|
56 |
+
except:
|
57 |
+
print("Error cr铆tico al cargar modelos. No hay modelos disponibles.")
|
58 |
+
return None
|
59 |
|
60 |
# Funci贸n para convertir texto a audio
|
61 |
+
def text_to_audio(text, model_epoch):
|
62 |
# Crear un array vac铆o por defecto en caso de error
|
63 |
default_audio = np.zeros(8000, dtype=np.float32)
|
64 |
sample_rate = 8000 # Ajusta seg煤n la configuraci贸n de tu modelo
|
|
|
67 |
return (sample_rate, default_audio)
|
68 |
|
69 |
try:
|
70 |
+
# Obtener el nombre del archivo del modelo seleccionado
|
71 |
+
model_filename = available_models[model_epoch]
|
72 |
+
|
73 |
+
# Cargar el modelo generador correspondiente
|
74 |
+
generator = load_generator_model(model_filename)
|
75 |
+
|
76 |
+
if generator is None:
|
77 |
+
print("No se pudo cargar el generador")
|
78 |
+
return (sample_rate, default_audio)
|
79 |
+
|
80 |
# Convertir texto a mel-spectrograma con Tacotron2
|
81 |
mel_output, _, _ = tacotron2.encode_text(text)
|
82 |
mel = mel_output.detach().cpu().numpy().astype(np.float32)
|
|
|
138 |
return (sample_rate, default_audio)
|
139 |
|
140 |
# Crear interfaz en Gradio
|
141 |
+
with gr.Blocks(title="Demo de TTS con Tacotron2 + Generador") as interface:
|
142 |
+
gr.Markdown("# Demo de TTS con Tacotron2 + Generador")
|
143 |
+
gr.Markdown("Convierte texto en audio usando Tacotron2 + modelo Generator entrenado en diferentes 茅pocas.")
|
144 |
+
|
145 |
+
with gr.Row():
|
146 |
+
with gr.Column(scale=3):
|
147 |
+
text_input = gr.Textbox(lines=2, placeholder="Escribe nine-", label="Texto a convertir")
|
148 |
+
with gr.Column(scale=1):
|
149 |
+
model_selection = gr.Dropdown(
|
150 |
+
choices=list(available_models.keys()),
|
151 |
+
value="脡poca 1000",
|
152 |
+
label="Selecciona la 茅poca del modelo"
|
153 |
+
)
|
154 |
+
|
155 |
+
generate_btn = gr.Button("Generar Audio", variant="primary")
|
156 |
+
|
157 |
+
audio_output = gr.Audio(label="Audio generado")
|
158 |
+
|
159 |
+
# Configurar ejemplos
|
160 |
+
examples = gr.Examples(
|
161 |
+
examples=[
|
162 |
+
["nine", "脡poca 100"],
|
163 |
+
["nine", "脡poca 1000"],
|
164 |
+
["nine", "脡poca 4200"]
|
165 |
+
],
|
166 |
+
inputs=[text_input, model_selection],
|
167 |
+
outputs=audio_output
|
168 |
+
)
|
169 |
+
|
170 |
+
# Conectar bot贸n a la funci贸n
|
171 |
+
generate_btn.click(fn=text_to_audio, inputs=[text_input, model_selection], outputs=audio_output)
|
172 |
+
|
173 |
+
# Tambi茅n permitir enviar con Enter desde el cuadro de texto
|
174 |
+
text_input.submit(fn=text_to_audio, inputs=[text_input, model_selection], outputs=audio_output)
|
175 |
|
176 |
# Lanzar aplicaci贸n
|
177 |
if __name__ == "__main__":
|
178 |
+
# Precargamos el modelo de la 茅poca 1000 para tenerlo disponible inmediatamente
|
179 |
+
load_generator_model(available_models["脡poca 1000"])
|
180 |
+
|
181 |
+
# Lanzamos la interfaz
|
182 |
+
interface.launch(debug=True)
|