File size: 589 Bytes
41aa20c
9b71340
 
 
3a96603
9b71340
 
 
5ed2c8c
8823536
9b71340
 
8823536
9b71340
 
 
 
 
5ed2c8c
9b71340
8823536
 
a31caa9
8823536
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import gradio as gr
import scipy
from transformers import VitsModel, AutoTokenizer
import torch

model = VitsModel.from_pretrained("facebook/mms-tts-crh")
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-crh")


def predict(image):
    global model, tokenizer
    text = name

    inputs = tokenizer(text, return_tensors="pt")

    with torch.no_grad():
        output = model(**inputs).waveform

    return gr.Audio(output)

gr.Interface(
    predict,
    inputs= gr.Textbox(value="", label="Duration in seconds"),
    outputs="audio",
    title="Hot Dog? Or Not?",
).launch()