import gradio as gr import time import urllib.request from pathlib import Path import os import torch import scipy.io.wavfile from espnet2.bin.tts_inference import Text2Speech from espnet2.utils.types import str_or_none gos_text2speech = Text2Speech.from_pretrained( model_tag="bartelds/grotts_vits", vocoder_tag="none", device="cpu", speed_control_alpha=1.0, noise_scale=1.0, noise_scale_dur=1.0 ) def inference(text,lang): with torch.no_grad(): if lang == "Hoogelaandsters": wav = gos_text2speech(text, sids=np.array([1]))["wav"] scipy.io.wavfile.write("out.wav", gos_text2speech.fs , wav.view(-1).cpu().numpy()) if lang == "Oldambsters": wav = gos_text2speech(text, sids=np.array([2]))["wav"] scipy.io.wavfile.write("out.wav", nld_text2speech.fs , wav.view(-1).cpu().numpy()) if lang == "Westerkwartiers": wav = gos_text2speech(text, sids=np.array([3]))["wav"] scipy.io.wavfile.write("out.wav", eng_text2speech.fs , wav.view(-1).cpu().numpy()) return "out.wav", "out.wav" title = "GroTTS" examples = [ ['Ze gingen mit klas noar waddendiek, over en deur bragel lopen.', 'Hoogelaandsters'] ] gr.Interface( inference, [gr.inputs.Textbox(label="input text", lines=3), gr.inputs.Radio(choices=["Hoogelaandsters", "Oldambsters", "Westerkertaaiers"], type="value", default="Hoogelaandsters", label="language")], [gr.outputs.Audio(type="file", label="Output"), gr.outputs.File()], title=title, examples=examples ).launch(enable_queue=True)