import gradio as gr import torch import random import string language = 'cyrillic' model_id = 'v4_cyrillic' sample_rate = 48000 speaker = 'b_krc' device = torch.device('cpu') model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models', model='silero_tts', language=language, speaker=model_id) model.to(device) # gpu or cpu def tts(text): random_string = ''.join(random.choices(string.ascii_letters, k=8)) model.save_wav( audio_path=f'{random_string}.wav', text=text, speaker=speaker, sample_rate=sample_rate ) return f'{random_string}.wav' iface = gr.Interface( fn=tts, inputs=gr.Textbox( label="Text", value="Кюн ахшы болсун", ), outputs=gr.Audio(label="Output", type='filepath'), title="Karachay-Malkar TTS", live=False ) iface.launch(share=False)