File size: 12,078 Bytes
707851b
c4d001b
2e5681a
707851b
c4d001b
 
 
 
 
2e5681a
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9eb7580
 
 
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9cde8f9
 
 
 
 
 
 
c4d001b
 
9cde8f9
c4d001b
9cde8f9
c4d001b
 
 
3cfc715
 
 
 
 
9cde8f9
3cfc715
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fd4b91
 
 
3cfc715
c4d001b
2fd4b91
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec85a52
c4d001b
 
 
 
 
9059222
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9059222
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3cfc715
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3cfc715
 
 
 
c4d001b
 
 
5759558
 
c4d001b
 
5759558
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4d001b
 
3cfc715
 
c4d001b
3cfc715
 
 
707851b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
import gradio as gr
import numpy as np
import spaces

from ipa import g2p
from ipa.ipa import text_to_ipa
from models import models_config


@spaces.GPU
def _do_tts(model_id, ipa, language_name, speaker_name=None, speaker_wav=None):
    model = models_config[model_id]["model"]
    if speaker_wav is not None:
        return model.tts(
            ipa,
            speaker_wav=speaker_wav,
            language_name=language_name,
            split_sentences=False,
        )
    return model.tts(
        ipa,
        speaker_name=speaker_name,
        language_name=language_name,
        split_sentences=False,
    )


def text_to_speech(
    model_id: str,
    use_default_emb_or_custom: str,
    speaker_wav,
    speaker: str,
    language: str,
    dialect: str,
    speed: float,
    text: str,
):
    if len(text) == 0:
        raise gr.Error("請勿輸入空字串。")
    tag = language
    if language not in g2p:
        tag = f"{language}_{dialect}"

    ignore_comma = "gt3" not in model_id

    ipa = text_to_ipa(text, tag, g2p, ignore_comma)

    models_config[model_id]["model"].tts_model.length_scale = speed
    if use_default_emb_or_custom == "預設語者":
        wav = _do_tts(
            model_id,
            ipa,
            speaker_name=speaker
            if len(models_config[model_id]["speaker_mapping"]) > 1
            else None,
            language_name=language,
        )
    else:
        wav = _do_tts(
            model_id,
            ipa,
            speaker_wav=speaker_wav,
            language_name=language,
        )

    return (
        models_config[model_id]["model"].tts_model.config.audio.sample_rate,
        np.array(wav),
    )


def when_model_selected(model_id):
    model_config = models_config[model_id]

    speaker_drop_down_choices = [
        (k, v) for k, v in model_config["speaker_mapping"].items()
    ]

    language_radio_choices = [
        (k, v) for k, v in model_config["language_mapping"].items()
    ]

    use_default_emb_or_ref_radio_visible = False
    if model_config["model"].tts_model.config.model_args.speaker_encoder_model_path:
        use_default_emb_or_ref_radio_visible = True

    return (
        gr.update(
            choices=speaker_drop_down_choices,
            value=speaker_drop_down_choices[0][1]
            if len(speaker_drop_down_choices) > 0
            else None,
            interactive=len(speaker_drop_down_choices) > 1,
        ),
        gr.update(
            choices=language_radio_choices,
            value=language_radio_choices[0][1],
            interactive=len(language_radio_choices) > 1,
        ),
        gr.update(visible=use_default_emb_or_ref_radio_visible, value="預設語者"),
    )


def use_default_emb_or_custom_radio_input(use_default_emb_or_custom):
    if use_default_emb_or_custom == "客製化語者":
        return gr.update(visible=True), gr.update(visible=False)
    return gr.update(visible=False), gr.update(visible=True)


def language_radio_changed(language):
    if language in g2p:
        dialect_choices = [("None", "")]
    else:
        dialect_choices = [
            (tag.split("_")[1], tag.split("_")[1])
            for tag in g2p.keys()
            if language in tag
        ]
    return gr.update(
        choices=dialect_choices,
        value=dialect_choices[0][1],
        interactive=len(dialect_choices) > 1,
        visible=language not in g2p,
    )


def update_example(language):
    component_props = examples.dataset.component_props

    if language in g2p:
        component_props[0]["visible"] = False
        component_props[0]["choices"] = [("None", "")]
    else:
        component_props[0]["visible"] = True
        component_props[0]["choices"] = [
            (tag.split("_")[1], tag.split("_")[1])
            for tag in g2p.keys()
            if language in tag
        ]
    if language == "阿美":
        return gr.Dataset(
            component_props=component_props,
            samples=[
                [
                    "南勢",
                    "U payniyaru’ nu pangcah i matiya, u ina haw ku miterungay, mikadavu ku vavainay i vavahiyan a luma.",
                    "阿美族的原始社會,是以女人為主的母系社會,男子授室入贅女家。",
                ],
                [
                    "恆春",
                    "O todong no cecayay a kitakit ko sa’osi to itiya:ay ho a kasaniyaro’.",
                    "當時的部落如同一個國家的概念。",
                ],
                [
                    "馬蘭",
                    "O sata’angayay a pisanga’an to tilong ko Tafalong itiya ho, mapaliwal i kasaniyaroaro’ ko misatilongan to sakacaloway no finawlan i ’orip a lalosidan.",
                    "而太巴塱部落則是當時最大的製造陶埸域,供應各部落族人日常生活的陶器用品。",
                ],
                [
                    "秀姑巒",
                    "ci ngangan ko Pangcah to Awa^, ’Afo^, Oning, Falah sanay a ngangan.",
                    "所以阿美族有Awa^(一無所有)、’Afo^(碳灰)、Oning(污垢)、Falah(丟棄)……等這樣的名字。",
                ],
                [
                    "海岸",
                    "mikayat ko kawili kawanan a kamay to tatihi, masakawanan ko rakat a mitaliyok, lahoday ko piperok, mato’asay, o wawa ato lafang maemin mangaay a masakero.",
                    "單純的手牽手,向右移動來繞圓圈,很輕鬆,老少咸宜全下場跳。",
                ],
            ],
        )
    if language == "賽德克":
        return gr.Dataset(
            component_props=component_props,
            samples=[
                [
                    "德固達雅",
                    "Netun so laqi tnqliyan de, asi ka mangal ngayan rrudan na seediq tnquli ka ngayan laqi tnqliyan.",
                    "若是收養的子女,被收養子女的名字就要承傳收養者家族先人的名字。",
                ],
                [
                    "德鹿谷",
                    "Mnsuwil mangal hangan samac ni pnegalang uri.",
                    "有時也以動植物命名。",
                ],
                [
                    "都達",
                    "so ana manu hhmaan Sediq u niqan balay snlhayan na.",
                    "農耕行為極度神聖化。",
                ],
            ],
        )
    if language == "太魯閣":
        return gr.Dataset(
            component_props=component_props,
            samples=[
                [
                    "",
                    "Rudan Truku sexual o kmgaaw ptasan dqras kana, ida qtaan bi bitaq sayang ka rudan ptasan dqras.",
                    "過去太魯閣族的耆老都是文面的,直到最近文面老人還能夠看得到。",
                ],
            ],
        )

def get_title():
    with open("DEMO.md") as tong:
        return tong.readline().strip('# ')

demo = gr.Blocks(
    title=get_title(),
    css="@import url(https://tauhu.tw/tauhu-oo.css);",
    theme=gr.themes.Default(
        font=(
            "tauhu-oo",
            gr.themes.GoogleFont("Source Sans Pro"),
            "ui-sans-serif",
            "system-ui",
            "sans-serif",
        )
    ),
)

with demo:
    default_model_id = list(models_config.keys())[0]
    model_drop_down = gr.Dropdown(
        models_config.keys(),
        value=default_model_id,
        label="模型",
    )
    use_default_emb_or_custom_radio = gr.Radio(
        label="語者類型",
        choices=["預設語者", "客製化語者"],
        value="預設語者",
        visible=True,
        show_label=False,
    )
    speaker_wav = gr.Audio(
        label="客製化語音",
        visible=False,
        editable=False,
        type="filepath",
        waveform_options=gr.WaveformOptions(
            show_controls=False,
            sample_rate=16000,
        ),
    )
    speaker_drop_down = gr.Dropdown(
        choices=[
            (k, v)
            for k, v in models_config[default_model_id]["speaker_mapping"].items()
        ],
        value=list(models_config[default_model_id]["speaker_mapping"].values())[0],
        label="語者",
        interactive=len(models_config[default_model_id]["speaker_mapping"]) > 1,
        visible=True,
    )
    use_default_emb_or_custom_radio.change(
        use_default_emb_or_custom_radio_input,
        inputs=[use_default_emb_or_custom_radio],
        outputs=[speaker_wav, speaker_drop_down],
    )

    default_language = list(
        models_config[default_model_id]["language_mapping"].values()
    )[0]
    language_radio = gr.Radio(
        choices=[
            (k, v)
            for k, v in models_config[default_model_id]["language_mapping"].items()
        ],
        value=default_language,
        label="語言",
        interactive=len(models_config[default_model_id]["language_mapping"]) > 1,
    )

    default_dialect_choices = [
        tag.split("_")[1] for tag in g2p.keys() if default_language in tag
    ]
    dialect_radio = gr.Radio(
        choices=default_dialect_choices,
        value=default_dialect_choices[0],
        label="方言",
        interactive=len(default_dialect_choices) > 1,
    )

    model_drop_down.change(
        when_model_selected,
        inputs=[model_drop_down],
        outputs=[speaker_drop_down, language_radio, use_default_emb_or_custom_radio],
    )

    input_text = gr.Textbox(
        label="輸入文字",
        value="",
    )

    speed = gr.Slider(maximum=1.5, minimum=0.5, value=1, label="語速")

    with open("DEMO.md") as tong:
        gr.Markdown(tong.read())
    gr.Interface(
        text_to_speech,
        inputs=[
            model_drop_down,
            use_default_emb_or_custom_radio,
            speaker_wav,
            speaker_drop_down,
            language_radio,
            dialect_radio,
            speed,
            input_text,
        ],
        outputs=[
            gr.Audio(interactive=False, label="合成語音", show_download_button=True),
        ],
        allow_flagging="auto",
    )

    dummy_chinese_text = gr.Textbox(visible=False, label="中文")

    examples = gr.Examples(
        [
            [
                "南勢",
                "U payniyaru’ nu pangcah i matiya, u ina haw ku miterungay, mikadavu ku vavainay i vavahiyan a luma.",
                "阿美族的原始社會,是以女人為主的母系社會,男子授室入贅女家。",
            ],
            [
                "恆春",
                "O todong no cecayay a kitakit ko sa’osi to itiya:ay ho a kasaniyaro’.",
                "當時的部落如同一個國家的概念。",
            ],
            [
                "馬蘭",
                "O sata’angayay a pisanga’an to tilong ko Tafalong itiya ho, mapaliwal i kasaniyaroaro’ ko misatilongan to sakacaloway no finawlan i ’orip a lalosidan.",
                "而太巴塱部落則是當時最大的製造陶埸域,供應各部落族人日常生活的陶器用品。",
            ],
            [
                "秀姑巒",
                "ci ngangan ko Pangcah to Awa^, ’Afo^, Oning, Falah sanay a ngangan.",
                "所以阿美族有Awa^(一無所有)、’Afo^(碳灰)、Oning(污垢)、Falah(丟棄)……等這樣的名字。",
            ],
            [
                "海岸",
                "mikayat ko kawili kawanan a kamay to tatihi, masakawanan ko rakat a mitaliyok, lahoday ko piperok, mato’asay, o wawa ato lafang maemin mangaay a masakero.",
                "單純的手牽手,向右移動來繞圓圈,很輕鬆,老少咸宜全下場跳。",
            ],
        ],
        label="範例",
        inputs=[dialect_radio, input_text, dummy_chinese_text],
        cache_examples=False,
    )
    language_radio.change(
        language_radio_changed, inputs=[language_radio], outputs=[dialect_radio]
    ).then(update_example, inputs=[language_radio], outputs=[examples.dataset])

demo.launch()