Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
from typing import Union | |
from pydub import AudioSegment | |
from io import BytesIO | |
import os | |
from utils import CheckLanguageIsSpanish | |
title = """ | |
<div style="display: flex; align-items: center;"> | |
<img src="https://i.postimg.cc/NGVZL3bg/logo-sintetica.png" alt="sintetica_logo" width="40" height="40" style="margin-right: 10px;"> | |
<h1 style="font-size: 24px;">Sintética: Text-to-Speech (TTS)</h1> | |
</div> | |
""" | |
description = """ | |
<div style="font-size: 16px;"> | |
Sintética: Sistema text-to-speech (TTS) desarrollado por MecanTronic S.A. | |
Para más información: | |
</div> | |
<div style="display: flex; flex-direction: column;"> | |
<a href="https://sintetica.com.ar/" style="display: flex; align-items: center; margin-bottom: 5px; font-size:16px;"> | |
<img src="https://i.postimg.cc/NGVZL3bg/logo-sintetica.png" alt="sintetica_logo" width="20" height="20" style="margin-right: 10px"> | |
Sintética | |
</a> | |
<a href="https://mecantronic.com.ar/" style="display: flex; align-items: center; font-size:16px;"> | |
<img src="https://i.postimg.cc/X7Cm5sD9/logo-MEC.png" alt="mecantronic_logo" width="20" height="20" style="margin-right: 10px;"> | |
MecanTronic | |
</a> | |
</div> | |
""" | |
speaker_mapping = { | |
"Micaela": "arf_00295", | |
"Florencia": "arf_02121", | |
"Rocío": "arf_02484", | |
"Pedro": "arm_03397", | |
"Pablo": "arm_06136", | |
"Juan": "arm_05223"} | |
def process( text: str, speaker: str, speed: Union[int, float]): | |
speaker = speaker_mapping[speaker] | |
if check_errors(text): | |
return None | |
request = { | |
"text": text, | |
"speaker": speaker, | |
"pitch_scale_factor": 0.95, | |
"length_scale_factor": 1/speed, | |
"output_samplerate": 44100} | |
audio_path = post_request(request) | |
audio_widget = play_audio(audio_path) | |
return audio_widget | |
def post_request(request: dict): | |
url = os.environ.get("URL_INFERENCE") | |
headers = {'Content-Type': 'application/json'} | |
response = requests.post(url, json=request, headers=headers) | |
return BytesIO(response.content) | |
def play_audio(audio_bytes: BytesIO): | |
audio = AudioSegment.from_file(audio_bytes) | |
audio_widget = gr.Audio(audio.export(format="wav").read()) | |
return audio_widget | |
def check_errors(text): | |
error = False | |
if len(text) > 330: | |
gr.Warning(f'La longitud del texto ({len(text)} caracteres) sobrepasa el máximo permitido.') | |
error = True | |
if not CheckLanguageIsSpanish().detect_english(text): | |
gr.Warning('El texto está en Ingles o posee oraciones en dicho idioma.') | |
error = True | |
return error | |
demo = gr.Blocks() | |
with demo: | |
gr.HTML(title) | |
language_choices = ["Español"] | |
language = gr.Radio( | |
choices=language_choices, | |
value=language_choices[0], | |
label="Idioma", | |
info="Próximamente nuevos idiomas disponibles" | |
) | |
with gr.Tabs(): | |
with gr.TabItem("Opciones del TTS"): | |
input_text = gr.Textbox( | |
label="330 caracteres como máximo", | |
type="text", | |
lines=3, | |
placeholder="Ingresa aquí el texto" | |
) | |
input_speaker = gr.Dropdown( | |
label="Hablante", | |
choices=speaker_mapping.keys(), | |
type="value", | |
value=list(speaker_mapping.keys())[0] | |
) | |
input_speed = gr.Slider( | |
minimum=0.5, | |
maximum=1.5, | |
value=1, | |
step=0.05, | |
label="Velocidad de reproducción", | |
) | |
with gr.Row(): | |
input_button = gr.Button("Procesar", variant="primary") | |
clear_button = gr.ClearButton([input_text], value="Limpiar") | |
gr.HTML('<div style="height: 15px;"></div>') | |
output_audio = gr.Audio( | |
type="filepath", | |
label="Audio sintetizado" | |
) | |
input_button.click( | |
process, | |
inputs=[ | |
input_text, | |
input_speaker, | |
input_speed, | |
], | |
outputs=[ | |
output_audio, | |
], | |
) | |
gr.Markdown(description) | |
if __name__ == "__main__": | |
demo.launch() | |