SeyedAli's picture
Update app.py
5b3e648
raw
history blame
705 Bytes
import tempfile ,os
import gradio as gr
from transformers import VitsModel, AutoTokenizer,pipeline
import torch
import numpy as np
import torchaudio
def TTS(text):
model = VitsModel.from_pretrained("SeyedAli/Persian-Speech-synthesis")
tokenizer = AutoTokenizer.from_pretrained("SeyedAli/Persian-Speech-synthesis")
inputs = tokenizer(text, return_tensors="pt")
pipe = pipeline("text-to-speech", model=model,tokenizer=tokenizer)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
torchaudio.save(fp, pipe(text)['audio'], rate=pipe(text)['sampling_rate'])
return fp.name
iface = gr.Interface(fn=TTS, inputs="text", outputs="audio")
iface.launch(share=False)