import gradio as gr import numpy as np import spaces from ipa import g2p from ipa.ipa import text_to_ipa from models import models_config @spaces.GPU def _do_tts(model_id, ipa, language_name, speaker_name=None, speaker_wav=None): model = models_config[model_id]["model"] if speaker_wav is not None: return model.tts( ipa, speaker_wav=speaker_wav, language_name=language_name, split_sentences=False, ) return model.tts( ipa, speaker_name=speaker_name, language_name=language_name, split_sentences=False, ) def text_to_speech( model_id: str, use_default_emb_or_custom: str, speaker_wav, speaker: str, language: str, dialect: str, speed: float, text: str, ): if len(text) == 0: raise gr.Error("請勿輸入空字串。") tag = language if language not in g2p: tag = f"{language}_{dialect}" ignore_comma = "gt3" not in model_id ipa = text_to_ipa(text, tag, g2p, ignore_comma) models_config[model_id]["model"].tts_model.length_scale = speed if use_default_emb_or_custom == "預設語者": wav = _do_tts( model_id, ipa, speaker_name=speaker if len(models_config[model_id]["speaker_mapping"]) > 1 else None, language_name=language, ) else: wav = _do_tts( model_id, ipa, speaker_wav=speaker_wav, language_name=language, ) return ( models_config[model_id]["model"].tts_model.config.audio.sample_rate, np.array(wav), ) def when_model_selected(model_id): model_config = models_config[model_id] speaker_drop_down_choices = [ (k, v) for k, v in model_config["speaker_mapping"].items() ] language_radio_choices = [ (k, v) for k, v in model_config["language_mapping"].items() ] use_default_emb_or_ref_radio_visible = False if model_config["model"].tts_model.config.model_args.speaker_encoder_model_path: use_default_emb_or_ref_radio_visible = True return ( gr.update( choices=speaker_drop_down_choices, value=speaker_drop_down_choices[0][1] if len(speaker_drop_down_choices) > 0 else None, interactive=len(speaker_drop_down_choices) > 1, ), gr.update( choices=language_radio_choices, value=language_radio_choices[0][1], interactive=len(language_radio_choices) > 1, ), gr.update(visible=use_default_emb_or_ref_radio_visible, value="預設語者"), ) def use_default_emb_or_custom_radio_input(use_default_emb_or_custom): if use_default_emb_or_custom == "客製化語者": return gr.update(visible=True), gr.update(visible=False) return gr.update(visible=False), gr.update(visible=True) def language_radio_changed(language): if language in g2p: dialect_choices = [("None", "")] else: dialect_choices = [ (tag.split("_")[1], tag.split("_")[1]) for tag in g2p.keys() if language in tag ] return gr.update( choices=dialect_choices, value=dialect_choices[0][1], interactive=len(dialect_choices) > 1, visible=language not in g2p, ) def update_example(language): component_props = examples.dataset.component_props if language in g2p: component_props[0]["visible"] = False component_props[0]["choices"] = [("None", "")] else: component_props[0]["visible"] = True component_props[0]["choices"] = [ (tag.split("_")[1], tag.split("_")[1]) for tag in g2p.keys() if language in tag ] if language == "阿美": return gr.Dataset( component_props=component_props, samples=[ [ "南勢", "U payniyaru’ nu pangcah i matiya, u ina haw ku miterungay, mikadavu ku vavainay i vavahiyan a luma.", "阿美族的原始社會,是以女人為主的母系社會,男子授室入贅女家。", ], [ "恆春", "O todong no cecayay a kitakit ko sa’osi to itiya:ay ho a kasaniyaro’.", "當時的部落如同一個國家的概念。", ], [ "馬蘭", "O sata’angayay a pisanga’an to tilong ko Tafalong itiya ho, mapaliwal i kasaniyaroaro’ ko misatilongan to sakacaloway no finawlan i ’orip a lalosidan.", "而太巴塱部落則是當時最大的製造陶埸域,供應各部落族人日常生活的陶器用品。", ], [ "秀姑巒", "ci ngangan ko Pangcah to Awa^, ’Afo^, Oning, Falah sanay a ngangan.", "所以阿美族有Awa^(一無所有)、’Afo^(碳灰)、Oning(污垢)、Falah(丟棄)……等這樣的名字。", ], [ "海岸", "mikayat ko kawili kawanan a kamay to tatihi, masakawanan ko rakat a mitaliyok, lahoday ko piperok, mato’asay, o wawa ato lafang maemin mangaay a masakero.", "單純的手牽手,向右移動來繞圓圈,很輕鬆,老少咸宜全下場跳。", ], ], ) if language == "賽德克": return gr.Dataset( component_props=component_props, samples=[ [ "德固達雅", "Netun so laqi tnqliyan de, asi ka mangal ngayan rrudan na seediq tnquli ka ngayan laqi tnqliyan.", "若是收養的子女,被收養子女的名字就要承傳收養者家族先人的名字。", ], [ "德鹿谷", "Mnsuwil mangal hangan samac ni pnegalang uri.", "有時也以動植物命名。", ], [ "都達", "so ana manu hhmaan Sediq u niqan balay snlhayan na.", "農耕行為極度神聖化。", ], ], ) if language == "太魯閣": return gr.Dataset( component_props=component_props, samples=[ [ "", "Rudan Truku sexual o kmgaaw ptasan dqras kana, ida qtaan bi bitaq sayang ka rudan ptasan dqras.", "過去太魯閣族的耆老都是文面的,直到最近文面老人還能夠看得到。", ], ], ) def get_title(): with open("DEMO.md") as tong: return tong.readline().strip('# ') demo = gr.Blocks( title=get_title(), css="@import url(https://tauhu.tw/tauhu-oo.css);", theme=gr.themes.Default( font=( "tauhu-oo", gr.themes.GoogleFont("Source Sans Pro"), "ui-sans-serif", "system-ui", "sans-serif", ) ), ) with demo: default_model_id = list(models_config.keys())[0] model_drop_down = gr.Dropdown( models_config.keys(), value=default_model_id, label="模型", ) use_default_emb_or_custom_radio = gr.Radio( label="語者類型", choices=["預設語者", "客製化語者"], value="預設語者", visible=True, show_label=False, ) speaker_wav = gr.Audio( label="客製化語音", visible=False, editable=False, type="filepath", waveform_options=gr.WaveformOptions( show_controls=False, sample_rate=16000, ), ) speaker_drop_down = gr.Dropdown( choices=[ (k, v) for k, v in models_config[default_model_id]["speaker_mapping"].items() ], value=list(models_config[default_model_id]["speaker_mapping"].values())[0], label="語者", interactive=len(models_config[default_model_id]["speaker_mapping"]) > 1, visible=True, ) use_default_emb_or_custom_radio.change( use_default_emb_or_custom_radio_input, inputs=[use_default_emb_or_custom_radio], outputs=[speaker_wav, speaker_drop_down], ) default_language = list( models_config[default_model_id]["language_mapping"].values() )[0] language_radio = gr.Radio( choices=[ (k, v) for k, v in models_config[default_model_id]["language_mapping"].items() ], value=default_language, label="語言", interactive=len(models_config[default_model_id]["language_mapping"]) > 1, ) default_dialect_choices = [ tag.split("_")[1] for tag in g2p.keys() if default_language in tag ] dialect_radio = gr.Radio( choices=default_dialect_choices, value=default_dialect_choices[0], label="方言", interactive=len(default_dialect_choices) > 1, ) model_drop_down.change( when_model_selected, inputs=[model_drop_down], outputs=[speaker_drop_down, language_radio, use_default_emb_or_custom_radio], ) input_text = gr.Textbox( label="輸入文字", value="", ) speed = gr.Slider(maximum=1.5, minimum=0.5, value=1, label="語速") with open("DEMO.md") as tong: gr.Markdown(tong.read()) gr.Interface( text_to_speech, inputs=[ model_drop_down, use_default_emb_or_custom_radio, speaker_wav, speaker_drop_down, language_radio, dialect_radio, speed, input_text, ], outputs=[ gr.Audio(interactive=False, label="合成語音", show_download_button=True), ], allow_flagging="auto", ) dummy_chinese_text = gr.Textbox(visible=False, label="中文") examples = gr.Examples( [ [ "南勢", "U payniyaru’ nu pangcah i matiya, u ina haw ku miterungay, mikadavu ku vavainay i vavahiyan a luma.", "阿美族的原始社會,是以女人為主的母系社會,男子授室入贅女家。", ], [ "恆春", "O todong no cecayay a kitakit ko sa’osi to itiya:ay ho a kasaniyaro’.", "當時的部落如同一個國家的概念。", ], [ "馬蘭", "O sata’angayay a pisanga’an to tilong ko Tafalong itiya ho, mapaliwal i kasaniyaroaro’ ko misatilongan to sakacaloway no finawlan i ’orip a lalosidan.", "而太巴塱部落則是當時最大的製造陶埸域,供應各部落族人日常生活的陶器用品。", ], [ "秀姑巒", "ci ngangan ko Pangcah to Awa^, ’Afo^, Oning, Falah sanay a ngangan.", "所以阿美族有Awa^(一無所有)、’Afo^(碳灰)、Oning(污垢)、Falah(丟棄)……等這樣的名字。", ], [ "海岸", "mikayat ko kawili kawanan a kamay to tatihi, masakawanan ko rakat a mitaliyok, lahoday ko piperok, mato’asay, o wawa ato lafang maemin mangaay a masakero.", "單純的手牽手,向右移動來繞圓圈,很輕鬆,老少咸宜全下場跳。", ], ], label="範例", inputs=[dialect_radio, input_text, dummy_chinese_text], cache_examples=False, ) language_radio.change( language_radio_changed, inputs=[language_radio], outputs=[dialect_radio] ).then(update_example, inputs=[language_radio], outputs=[examples.dataset]) demo.launch()