ollui's picture
Update app.py
d487733 verified
raw
history blame contribute delete
790 Bytes
from transformers import VitsTokenizer, VitsModel
import torch
import soundfile as sf
import gradio as gr
model_id = "facebook/mms-tts-mhr"
tokenizer = VitsTokenizer.from_pretrained(model_id)
model = VitsModel.from_pretrained(model_id)
def tts_mari(text):
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
output = model(**inputs)
audio = output.waveform.squeeze().cpu().numpy()
output_path = "output.wav"
sf.write(output_path, audio, 16000)
return output_path
interface = gr.Interface(
fn=tts_mari,
inputs=gr.Textbox(label="Nhập văn bản Meadow Mari"),
outputs=gr.Audio(type="filepath", label="Kết quả TTS"),
title="Meadow Mari TTS - Powered by Facebook VITS"
)
if __name__ == "__main__":
interface.launch()