Ttsjmong / app.py
Mynameisju's picture
Update app.py
5347141 verified
raw
history blame contribute delete
393 Bytes
from transformers import AutoProcessor, VitsModel
import torch
import soundfile as sf
processor = AutoProcessor.from_pretrained("facebook/mms-tts-hmn")
model = VitsModel.from_pretrained("facebook/mms-tts-hmn")
inputs = processor(text="Kuv hlub koj", return_tensors="pt")
with torch.no_grad():
speech = model(**inputs).waveform
sf.write("output.wav", speech.numpy()[0], samplerate=16000)