import tempfile from typing import Optional from TTS.config import load_config import gradio as gr import numpy as np from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer MODELS = {} SPEAKERS = {} manager = ModelManager() MODEL_NAMES = manager.list_tts_models() # filter out multi-speaker models and slow wavegrad vocoders filters = ["vctk", "your_tts", "ek1"] MODEL_NAMES = [model_name for model_name in MODEL_NAMES if not any(f in model_name for f in filters)] # reorder models MODEL_NAMES[0], MODEL_NAMES[1], MODEL_NAMES[2]= MODEL_NAMES[1], MODEL_NAMES[2], MODEL_NAMES[0] print(MODEL_NAMES) def tts(text: str, model_name: str, speaker_idx: str=None): print(text, model_name) # download model model_path, config_path, model_item = manager.download_model(f"tts_models/{model_name}") vocoder_name: Optional[str] = model_item["default_vocoder"] # download vocoder vocoder_path = None vocoder_config_path = None if vocoder_name is not None: vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name) # init synthesizer synthesizer = Synthesizer( model_path, config_path, None, None, vocoder_path, vocoder_config_path, ) # synthesize if synthesizer is None: raise NameError("model not found") wavs = synthesizer.tts(text, speaker_idx) # return output with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: synthesizer.save_wav(wavs, fp) return fp.name article= """ Visit us on Coqui.ai and drop a 🌟 to πŸ”—CoquiTTS.
Run CoquiTTS locally for the best result. Check out our πŸ”—documentation. ```bash $ pip install TTS ... $ tts --list_models ... $ tts --text "Text for TTS" --model_name "///" --out_path folder/to/save/output.wav ```
πŸ‘‘ Model contributors - @nmstoker - @kaiidams - @WeberJulian, - @Edresson - @thorstenMueller - @r-dh - @kirianguiller - @robinhad - @fkarabiber - @nicolalandro πŸ‘‰ Drop a ✨PR✨ on 🐸TTS to share a new model and have it included here. """ iface = gr.Interface( fn=tts, inputs=[ gr.inputs.Textbox( label="Input Text", default="This sentence has been generated by a speech synthesis system.", ), gr.inputs.Radio( label="Pick a TTS Model - (language/dataset/model_name)", choices=MODEL_NAMES, ), # gr.inputs.Dropdown(label="Select a speaker", choices=SPEAKERS, default=None) # gr.inputs.Audio(source="microphone", label="Record your voice.", type="numpy", label=None, optional=False) ], outputs=gr.outputs.Audio(label="Output"), title="πŸΈπŸ’¬ CoquiTTS Demo", theme="grass", description="πŸΈπŸ’¬ Coqui TTS - a deep learning toolkit for Text-to-Speech, battle-tested in research and production.", article=article, allow_flagging=False, flagging_options=['error', 'bad-quality', 'wrong-pronounciation'], layout="vertical", live=False ) iface.launch(share=False)