text-to-speech / app.py
DHEIVER's picture
Update app.py
69f50e2 verified
raw
history blame
4.17 kB
import logging
import os
import time
import uuid
import gradio as gr
import soundfile as sf
from model import get_pretrained_model, language_to_models
title = "# Next-gen Kaldi: Text-to-speech (TTS)"
description = """
This space shows how to convert text to speech with Next-gen Kaldi.
It is running on CPU within a docker container provided by Hugging Face.
See more information by visiting the following links:
- <https://github.com/k2-fsa/sherpa-onnx>
If you want to deploy it locally, please see
<https://k2-fsa.github.io/sherpa/>
If you want to use Android APKs, please see
<https://k2-fsa.github.io/sherpa/onnx/tts/apk.html>
If you want to use Android text-to-speech engine APKs, please see
<https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html>
If you want to download an all-in-one exe for Windows, please see
<https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models>
"""
css = """
.result {display:flex;flex-direction:column}
.result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
.result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
.result_item_error {background-color:#ff7070;color:white;align-self:start}
"""
# Simplified examples for Portuguese only
examples = [
["Portuguese", "csukuangfj/vits-mms-por", "Eu desejo uma versão simplificada para português.", 0, 1.0],
]
# Use only Portuguese as a language choice
language_choices = ["Portuguese"]
demo = gr.Blocks(css=css)
with demo:
gr.Markdown(title)
# Use Radio instead of Dropdown for language choice
language_radio = gr.Radio(
label="Language",
choices=language_choices,
value=language_choices[0],
)
# Initialize model_dropdown with Portuguese models
model_dropdown = gr.Dropdown(
choices=language_to_models["Portuguese"],
label="Select a model",
value=language_to_models["Portuguese"][0],
)
# No need to update model_dropdown for a single language
with gr.Tabs():
with gr.TabItem("Please input your text"):
input_text = gr.Textbox(
label="Input text",
info="Your text",
lines=3,
placeholder="Please input your text here",
)
input_sid = gr.Textbox(
label="Speaker ID",
info="Speaker ID",
lines=1,
max_lines=1,
value="0",
placeholder="Speaker ID. Valid only for mult-speaker model",
)
input_speed = gr.Slider(
minimum=0.1,
maximum=10,
value=1,
step=0.1,
label="Speed (larger->faster; smaller->slower)",
)
input_button = gr.Button("Submit")
output_audio = gr.Audio(label="Output")
output_info = gr.HTML(label="Info")
gr.Examples(
examples=examples,
fn=process,
inputs=[
language_radio,
model_dropdown,
input_text,
input_sid,
input_speed,
],
outputs=[
output_audio,
output_info,
],
)
input_button.click(
process,
inputs=[
language_radio,
model_dropdown,
input_text,
input_sid,
input_speed,
],
outputs=[
output_audio,
output_info,
],
)
gr.Markdown(description)
def download_espeak_ng_data():
os.system(
"""
cd /tmp
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
tar xf espeak-ng-data.tar.bz2
"""
)
if __name__ == "__main__":
download_espeak_ng_data()
formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
logging.basicConfig(format=formatter, level=logging.INFO)
demo.launch()