File size: 4,210 Bytes
3ee94dd
 
 
 
 
 
 
 
 
 
 
e80739d
3ee94dd
 
 
 
 
1b7f8cd
 
3ee94dd
c68294e
7431872
 
 
 
 
c68294e
3ee94dd
 
 
 
f007a1f
 
 
3ee94dd
 
57bdc7e
3ee94dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3468586
3ee94dd
d019a70
3ee94dd
3468586
3ee94dd
 
 
 
 
 
 
 
7d0f455
d019a70
3ee94dd
68baa0d
3ee94dd
d019a70
 
 
 
 
 
 
 
b1929e6
8821299
10b1542
3ee94dd
d019a70
3ee94dd
 
 
 
 
 
8b8606a
1b7f8cd
3ee94dd
 
8b8606a
3ee94dd
 
 
 
 
 
 
 
 
 
 
 
8b8606a
3ee94dd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import tempfile
from typing import Optional
from TTS.config import load_config
import gradio as gr
import numpy as np
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer


MODELS = {}
SPEAKERS = {}
MAX_TXT_LEN = 100


manager = ModelManager()
MODEL_NAMES = manager.list_tts_models()

# filter out multi-speaker models and slow wavegrad vocoders
filters = ["vctk", "your_tts", "ek1"]
MODEL_NAMES = [model_name for model_name in MODEL_NAMES if not any(f in model_name for f in filters)]

EN = [el for el in MODEL_NAMES if "/en/" in el]
OTHER = [el for el in MODEL_NAMES if "/en/" not in el]
EN[0], EN[5] = EN[5], EN[0]
MODEL_NAMES = EN + OTHER

# reorder models
print(MODEL_NAMES)


def tts(text: str, model_name: str, speaker_idx: str=None):
    if len(text) > MAX_TXT_LEN:
        text = text[:MAX_TXT_LEN]
        print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
    print(text, model_name)
    # download model
    model_path, config_path, model_item = manager.download_model(model_name)
    vocoder_name: Optional[str] = model_item["default_vocoder"]
    # download vocoder
    vocoder_path = None
    vocoder_config_path = None
    if vocoder_name is not None:
        vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
    # init synthesizer
    synthesizer = Synthesizer(
        model_path, config_path, None, None, vocoder_path, vocoder_config_path,
    )
    # synthesize
    if synthesizer is None:
        raise NameError("model not found")
    wavs = synthesizer.tts(text, speaker_idx)
    # return output
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        synthesizer.save_wav(wavs, fp)
        return fp.name



article= """
Visit us on Coqui.ai and drop a 🌟 to πŸ”—<a href="https://github.com/coqui-ai/TTS" target="_blank">CoquiTTS</a>.

<br/>

Run CoquiTTS locally for the best result. Check out our πŸ”—<a href="https://tts.readthedocs.io/en/latest/inference.html">documentation</a>.

```bash
$ pip install TTS
...
$ tts --list_models
...
$ tts --text "Text for TTS" --model_name "<type>/<language>/<dataset>/<model_name>" --out_path folder/to/save/output.wav
```
<img src="https://static.scarf.sh/a.png?x-pxid=1404a024-e647-4406-bb9a-4ade0c931182" />
<br/>

πŸ‘‘ <b> Model contributors</b>

- <a href="https://github.com/nmstoker/" target="_blank">@nmstoker</a>
- <a href="https://github.com/kaiidams/" target="_blank">@kaiidams</a>
- <a href="https://github.com/WeberJulian/" target="_blank">@WeberJulian,</a>
- <a href="https://github.com/Edresson/" target="_blank">@Edresson</a>
- <a href="https://github.com/thorstenMueller/" target="_blank">@thorstenMueller</a>
- <a href="https://github.com/r-dh/" target="_blank">@r-dh</a>
- <a href="https://github.com/kirianguiller/" target="_blank">@kirianguiller</a>
- <a href="https://github.com/robinhad/" target="_blank">@robinhad</a>
- <a href="https://github.com/fkarabiber/" target="_blank">@fkarabiber</a>
- <a href="https://github.com/nicolalandro/" target="_blank">@nicolalandro</a>
- <a href="https://github.com/a-froghyar" target="_blank">@a-froghyar</a>

πŸ‘‰ Drop a ✨PR✨ on 🐸TTS to share a new model and have it included here.
"""

iface = gr.Interface(
    fn=tts,
    inputs=[
        gr.inputs.Textbox(
            label="Input Text",
            default="This sentence has been generated by a speech synthesis system.",
        ),
        gr.inputs.Radio(
            label="Pick a TTS Model - (language/dataset/model_name)",
            choices=MODEL_NAMES,
        ),
        # gr.inputs.Dropdown(label="Select a speaker", choices=SPEAKERS, default=None)
        # gr.inputs.Audio(source="microphone", label="Record your voice.", type="numpy", label=None, optional=False)
    ],
    outputs=gr.outputs.Audio(label="Output"),
    title="πŸΈπŸ’¬ CoquiTTS Demo",
    theme="grass",
    description="πŸΈπŸ’¬  Coqui TTS - a deep learning toolkit for Text-to-Speech, battle-tested in research and production.",
    article=article,
    allow_flagging=False,
    flagging_options=['error', 'bad-quality', 'wrong-pronounciation'],
    layout="vertical",
    live=False
)
iface.launch(share=False)