Spaces:
Runtime error
Runtime error
File size: 7,100 Bytes
3ee94dd e80739d 3ee94dd 1b7f8cd 3ee94dd c68294e 7431872 c68294e 3ee94dd a3635dc f007a1f 3ee94dd 57bdc7e 3ee94dd a3635dc 3ee94dd a3635dc 0ed9306 a3635dc bdaa39a a3635dc bdaa39a a3635dc 0ed9306 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import tempfile
from typing import Optional
from TTS.config import load_config
import gradio as gr
import numpy as np
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
MODELS = {}
SPEAKERS = {}
MAX_TXT_LEN = 100
manager = ModelManager()
MODEL_NAMES = manager.list_tts_models()
# filter out multi-speaker models and slow wavegrad vocoders
filters = ["vctk", "your_tts", "ek1"]
MODEL_NAMES = [model_name for model_name in MODEL_NAMES if not any(f in model_name for f in filters)]
EN = [el for el in MODEL_NAMES if "/en/" in el]
OTHER = [el for el in MODEL_NAMES if "/en/" not in el]
EN[0], EN[5] = EN[5], EN[0]
MODEL_NAMES = EN + OTHER
# reorder models
print(MODEL_NAMES)
def tts(text: str, model_name: str):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
print(text, model_name)
# download model
model_path, config_path, model_item = manager.download_model(model_name)
vocoder_name: Optional[str] = model_item["default_vocoder"]
# download vocoder
vocoder_path = None
vocoder_config_path = None
if vocoder_name is not None:
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
# init synthesizer
synthesizer = Synthesizer(
model_path, config_path, None, None, vocoder_path, vocoder_config_path,
)
# synthesize
if synthesizer is None:
raise NameError("model not found")
wavs = synthesizer.tts(text, None)
# return output
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
synthesizer.save_wav(wavs, fp)
return fp.name
title = """<h1 align="center">🐸💬 CoquiTTS Playground </h1>"""
with gr.Blocks(analytics_enabled=False) as demo:
with gr.Row():
with gr.Column():
gr.Markdown(
"""
## <img src="https://raw.githubusercontent.com/coqui-ai/TTS/main/images/coqui-log-green-TTS.png" height="56"/>
"""
)
gr.Markdown(
"""
<br />
## 🐸Coqui.ai News
- 📣 ⓍTTS, our production TTS model that can speak 13 languages, is released [Blog Post](https://coqui.ai/blog/tts/open_xtts), [Demo](https://huggingface.co/spaces/coqui/xtts), [Docs](https://tts.readthedocs.io/en/dev/models/xtts.html)
- 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference with unconstrained voice cloning. [Docs](https://tts.readthedocs.io/en/dev/models/bark.html)
- 📣 You can use [~1100 Fairseq models](https://github.com/facebookresearch/fairseq/tree/main/examples/mms) with 🐸TTS.
- 📣 🐸TTS now supports 🐢Tortoise with faster inference. [Docs](https://tts.readthedocs.io/en/dev/models/tortoise.html)
- 📣 **Coqui Studio API** is landed on 🐸TTS. - [Example](https://github.com/coqui-ai/TTS/blob/dev/README.md#-python-api)
- 📣 [**Coqui Studio API**](https://docs.coqui.ai/docs) is live.
- 📣 Voice generation with prompts - **Prompt to Voice** - is live on [**Coqui Studio**](https://app.coqui.ai/auth/signin)!! - [Blog Post](https://coqui.ai/blog/tts/prompt-to-voice)
- 📣 Voice generation with fusion - **Voice fusion** - is live on [**Coqui Studio**](https://app.coqui.ai/auth/signin).
- 📣 Voice cloning is live on [**Coqui Studio**](https://app.coqui.ai/auth/signin).
<br>
"""
)
with gr.Column():
gr.Markdown(
"""
<br/>
💻 This space showcases some of the **[CoquiTTS](https://github.com/coqui-ai/TTS)** models.
<br/>
There are > 30 languages with single and multi speaker models, all thanks to our 👑 Contributors.
<br/>
Visit the links below for more.
| | |
| ------------------------------- | --------------------------------------- |
| 🐸💬 **CoquiTTS** | [Github](https://github.com/coqui-ai/TTS) |
| 💼 **Documentation** | [ReadTheDocs](https://tts.readthedocs.io/en/latest/)
| 👩💻 **Questions** | [GitHub Discussions] |
| 🗯 **Community** | [![Dicord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) |
[github issue tracker]: https://github.com/coqui-ai/tts/issues
[github discussions]: https://github.com/coqui-ai/TTS/discussions
[discord]: https://discord.gg/5eXr5seRrv
"""
)
with gr.Row():
gr.Markdown(
"""
<details>
<summary>👑 Model contributors</summary>
- <a href="https://github.com/nmstoker/" target="_blank">@nmstoker</a>
- <a href="https://github.com/kaiidams/" target="_blank">@kaiidams</a>
- <a href="https://github.com/WeberJulian/" target="_blank">@WeberJulian,</a>
- <a href="https://github.com/Edresson/" target="_blank">@Edresson</a>
- <a href="https://github.com/thorstenMueller/" target="_blank">@thorstenMueller</a>
- <a href="https://github.com/r-dh/" target="_blank">@r-dh</a>
- <a href="https://github.com/kirianguiller/" target="_blank">@kirianguiller</a>
- <a href="https://github.com/robinhad/" target="_blank">@robinhad</a>
- <a href="https://github.com/fkarabiber/" target="_blank">@fkarabiber</a>
- <a href="https://github.com/nicolalandro/" target="_blank">@nicolalandro</a>
- <a href="https://github.com/a-froghyar" target="_blank">@a-froghyar</a>
- <a href="https://github.com/manmay-nakhashi" target="_blank">@manmay-nakhashi</a>
- <a href="https://github.com/noml4u" target="_blank">@noml4u</a>
</details>
<br/>
"""
)
with gr.Row():
with gr.Column():
input_text = gr.inputs.Textbox(
label="Input Text",
default="This sentence has been generated by a speech synthesis system.",
)
model_select = gr.inputs.Dropdown(
label="Pick Model: tts_models/<language>/<dataset>/<model_name>",
choices=MODEL_NAMES,
default="tts_models/en/jenny/jenny"
)
tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
with gr.Column():
output_audio = gr.outputs.Audio(label="Output", type="filepath")
tts_button.click(
tts,
inputs=[
input_text,
model_select,
],
outputs=[output_audio],
)
demo.queue(concurrency_count=16).launch(debug=True) |