File size: 2,585 Bytes
c6f03d2
 
 
 
 
 
 
 
fe5a4b7
c6f03d2
 
 
 
a74109d
c6f03d2
 
7f590c6
78cc010
c6f03d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe5a4b7
 
 
39a66fd
 
 
 
fe5a4b7
c6f03d2
 
 
fe5a4b7
c6f03d2
 
fe5a4b7
c6f03d2
 
 
 
 
 
 
 
 
 
 
 
fe5a4b7
c6f03d2
 
 
 
 
 
fe5a4b7
 
 
 
 
 
 
 
 
c6f03d2
 
78cc010
c6f03d2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import gradio as gr
import tempfile

from huggingface_hub import hf_hub_download
from torch import no_grad, package
import ctypes
import gc

from accentor import accentification, stress_replace_and_shift, accentors


config = {
    "mykyta": "theodotus/tts-vits-mykyta-uk",
    "mykyta-lite": "theodotus/tts-vits-mykyta-low-uk",
    "olena": "theodotus/tts-vits-olena-uk",
    "lada": "theodotus/tts-vits-lada-uk",
    "dmytro": "theodotus/tts-vits-dmytro-uk",
    "harakternyk": "theodotus/tts-vits-harakternyk-uk",
}

voices = list(config.keys())

tts_kwargs = {
    "speaker_name": "uk",
    "language_name": "uk",
}


def trim_memory():
    libc = ctypes.CDLL("libc.so.6")
    libc.malloc_trim(0)
    gc.collect()


def init_models():
    models = {}
    for name, model_name in config.items():
        model_path = hf_hub_download(model_name, "model.pt")
        importer = package.PackageImporter(model_path)
        synt = importer.load_pickle("tts_models", "model")
        models[name] = synt
    return models


def tts(text: str, voice: str, mode: str):
    # accentor
    accented_text = accentification(text, mode)
    if (mode != "none"):
        plussed_text = stress_replace_and_shift(accented_text)
    else:
        plussed_text = accented_text
    # TTS
    synt = models[voice]
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        with no_grad():
            wav_data = synt.tts(plussed_text, **tts_kwargs)
            synt.save_wav(wav_data, fp)
            trim_memory()
        return fp.name, accented_text



models = init_models()



iface = gr.Interface(
    fn=tts,
    inputs=[
        gr.Textbox(
            label="Input",
            value="Кам'янець-Подільський - місто в Хмельницькій області України, центр Кам'янець-Подільської міської об'єднаної територіальної громади і Кам'янець-Подільського району.",
        ),
        gr.Radio(
            label="Voice",
            choices=voices,
            value=voices[0],
        ),
        gr.Radio(
            label="Accentor",
            choices=accentors,
            value=accentors[0],
        ),
    ],
    outputs=[
        gr.Audio(label="Output"),
        gr.Textbox(label="Stressed")
    ],
    title="🇺🇦 - Ukrainian Voices",
    article="[Harakternyk](https://huggingface.co/theodotus/tts-vits-harakternyk-uk) model is licensed under [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/)",
)

iface.launch()