import os os.environ['CUDA_VISIBLE_DEVICES'] = '' os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' import malaya_speech from malaya_speech.utils.astype import float_to_int import gradio as gr import logging import json logging.basicConfig(level=logging.INFO) MODELS = {} AVAILABLE_MODELS = malaya_speech.tts.available_vits().index.tolist() def tts(text, model, temperature, length_ratio): global MODELS if model not in MODELS: MODELS[model] = malaya_speech.tts.vits(model=model) r = MODELS[model].predict( text, temperature = temperature, length_ratio = length_ratio, ) data = float_to_int(r['y']) return (22050, data) demo = gr.Interface( fn=tts, inputs=[ gr.components.Textbox(label='Text'), gr.components.Dropdown(label='Available models', choices=AVAILABLE_MODELS, value = 'mesolitica/VITS-osman'), gr.Slider(0.0, 1.0, value=0.6666, label='temperature, changing this will manipulate pitch'), gr.Slider(0.0, 3.0, value=1.0, label='length ratio, changing this will manipulate duration output'), ], outputs=['audio'], examples=[ ['Syed Saddiq berkata, mereka seharusnya mengingati bahawa semasa menjadi Perdana Menteri Pakatan Harapan', 'mesolitica/VITS-osman', 0.6666, 1.0], ['SHAH ALAM - Pertubuhan Kebajikan Anak Bersatu Selangor bersetuju pihak kerajaan mewujudkan Suruhanjaya Siasatan Diraja untuk menyiasat isu kartel daging.', 'mesolitica/VITS-haqkiem', 0.6666, 1.0], ], cache_examples=False, title='End-to-End TTS using VITS from Mesolitica', ) demo.launch(server_name='0.0.0.0')