Spaces:

ovieyra21
/

tts-fair-spa

Runtime error

App Files Files Community

tts-fair-spa / app.py

ovieyra21

Update app.py

6e2f126 verified 6 months ago

raw

history blame

2.21 kB

	import os
	import gradio as gr
	import torch
	from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
	from transformers import AutoProcessor, AutoModelForTextToSpectrogram
	from huggingface_hub import login
	from transformers import pipeline


	# Obtener el token desde las variables de entorno
	hf_token = os.getenv("HF_TOKEN")
	if hf_token is None:
	raise ValueError("Debe proporcionar un token de Hugging Face en las variables de entorno.")

	# Ingresar el token
	login(hf_token)

	pipe = pipeline("text-to-speech", model="ovieyra21/es_speecht5_tts_mabama")


	# Intentar cargar el modelo

	# Load model directly
	processor = AutoProcessor.from_pretrained("ovieyra21/es_speecht5_tts_mabama")
	model = AutoModelForTextToSpectrogram.from_pretrained("ovieyra21/es_speecht5_tts_mabama")


	try:
	models, cfg, task = load_model_ensemble_and_task_from_hf_hub("ovieyra21/es_speecht5_tts_mabama")
	if not models:
	raise RuntimeError("No se pudo cargar el modelo. Asegúrate de que el nombre del modelo es correcto y que está disponible en Hugging Face Hub.")
	model = models[0]
	except Exception as e:
	raise RuntimeError(f"Error al cargar el modelo: {e}")

	# Función para generar la salida de texto a voz
	def text_to_speech(text):
	try:
	# Preprocesamiento del texto
	tokens = task.source_dictionary.encode_line(text, add_if_not_exist=False)

	# Generar salida de audio
	with torch.no_grad():
	sample = {"net_input": {"src_tokens": tokens.unsqueeze(0).long()}}
	generator = task.build_generator([model], cfg.generation)
	audio = task.inference_step(generator, [model], sample)

	return audio[0][0].numpy()
	except Exception as e:
	return f"Error en la generación de audio: {e}"

	# Crear interfaz de Gradio
	iface = gr.Interface(
	fn=text_to_speech,
	inputs=gr.inputs.Textbox(lines=2, placeholder="Ingrese el texto aquí..."),
	outputs=gr.outputs.Audio(type="numpy", label="Output Audio"),
	title="Conversor de Texto a Voz",
	description="Ingrese texto para convertirlo a voz utilizando el modelo speecht5_tts_mabama_es."
	)

	if __name__ == "__main__":
	iface.launch()