|
import tempfile |
|
from typing import Optional |
|
|
|
import gradio as gr |
|
import numpy as np |
|
|
|
from TTS.utils.manage import ModelManager |
|
from TTS.utils.synthesizer import Synthesizer |
|
|
|
MODEL_NAMES = [ |
|
|
|
"en/ljspeech/tacotron2-DDC", |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"fr/mai/tacotron2-DDC", |
|
"zh-CN/baker/tacotron2-DDC-GST", |
|
"nl/mai/tacotron2-DDC", |
|
"de/thorsten/tacotron2-DCA", |
|
|
|
] |
|
MODELS = {} |
|
|
|
manager = ModelManager() |
|
|
|
for MODEL_NAME in MODEL_NAMES: |
|
print(f"downloading {MODEL_NAME}") |
|
model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}") |
|
vocoder_name: Optional[str] = model_item["default_vocoder"] |
|
vocoder_path = None |
|
vocoder_config_path = None |
|
if vocoder_name is not None: |
|
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name) |
|
|
|
synthesizer = Synthesizer( |
|
model_path, config_path, None, vocoder_path, vocoder_config_path, |
|
) |
|
MODELS[MODEL_NAME] = synthesizer |
|
|
|
|
|
def tts(text: str, model_name: str): |
|
print(text, model_name) |
|
synthesizer = MODELS.get(model_name, None) |
|
if synthesizer is None: |
|
raise NameError("model not found") |
|
wavs = synthesizer.tts(text) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: |
|
synthesizer.save_wav(wavs, fp) |
|
return fp.name |
|
|
|
|
|
|
|
iface = gr.Interface( |
|
fn=tts, |
|
inputs=[ |
|
gr.inputs.Textbox( |
|
label="Input", |
|
default="Hello, how are you?", |
|
), |
|
gr.inputs.Radio( |
|
label="Pick a TTS Model", |
|
choices=MODEL_NAMES, |
|
), |
|
], |
|
outputs=gr.outputs.Audio(label="Output"), |
|
title="๐ธ๐ฌ - Coqui TTS", |
|
theme="huggingface", |
|
description="๐ธ๐ฌ - a deep learning toolkit for Text-to-Speech, battle-tested in research and production", |
|
article="more info at https://github.com/coqui-ai/TTS", |
|
) |
|
iface.launch() |
|
|