from transformers import VitsModel, AutoTokenizer import torch model = VitsModel.from_pretrained("facebook/mms-tts-eng") tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng") def generate_waveform(text): inputs = tokenizer(text, return_tensors="pt") with torch.no_grad(): output = model(**inputs).waveform return (output) demo_text_to_speech = gr.Interface(text_to_speech, title="Text to speech converter", description="Enter a text here!", inputs='text', outputs='audio') demo_text_to_speech.launch()