vits-kbd-male / app.py
anzorq's picture
Update app.py
27e87f7
raw
history blame
2.18 kB
import os
from TTS.utils.download import download_url
from TTS.utils.synthesizer import Synthesizer
import gradio as gr
import tempfile
MAX_TXT_LEN = 800
BASE_DIR = "kbd-vits-tts-{}"
MALE_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/checkpoint_56000.pth"
MALE_CONFIG_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/config_35000.json"
FEMALE_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/best_model_56351.pth"
FEMALE_CONFIG_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/config.json"
def download_model_and_config(gender):
dir_path = BASE_DIR.format(gender)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
model_url = MALE_MODEL_URL if gender == "male" else FEMALE_MODEL_URL
config_url = MALE_CONFIG_URL if gender == "male" else FEMALE_CONFIG_URL
download_url(model_url, dir_path, "model.pth")
download_url(config_url, dir_path, "config.json")
return dir_path
download_model_and_config("male")
download_model_and_config("female")
def tts(text: str, voice: str="Male"):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
print(text)
text = text.replace("I", "ӏ") #replace capital is with "Palochka" symbol
model_dir = BASE_DIR.format("male" if voice == "Male" else "female")
# synthesize
synthesizer = Synthesizer(f"{model_dir}/model.pth", f"{model_dir}/config.json")
wavs = synthesizer.tts(text)
# return output
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
synthesizer.save_wav(wavs, fp)
return fp.name
iface = gr.Interface(
fn=tts,
inputs=[
gr.Textbox(
label="Text",
value="Default text here if you need it.",
),
gr.Radio(
choices=["Male", "Female"],
value="Male", # Set Male as the default choice
label="Voice"
)
],
outputs=gr.Audio(label="Output", type='filepath'),
title="KBD TTS",
live=False
)
iface.launch(share=False)