|
import tempfile ,os |
|
import gradio as gr |
|
from transformers import VitsModel, AutoTokenizer |
|
import torch |
|
import numpy as np |
|
import torchaudio |
|
|
|
model = VitsModel.from_pretrained("SeyedAli/Arabic-Speech-synthesis") |
|
tokenizer = AutoTokenizer.from_pretrained("SeyedAli/Arabic-Speech-synthesis") |
|
|
|
def TTS(text): |
|
inputs = tokenizer(text, return_tensors="pt") |
|
with torch.no_grad(): |
|
output = model(**inputs).waveform |
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: |
|
torchaudio.save(fp, output, model.config.sampling_rate,format="wav") |
|
return fp.name |
|
iface = gr.Interface(fn=TTS, inputs="text", outputs="audio") |
|
iface.launch(share=False) |