Mynameisju commited on
Commit
5347141
·
verified ·
1 Parent(s): f99ee34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -28
app.py CHANGED
@@ -1,33 +1,12 @@
 
1
  import torch
2
- from transformers import VITSModel, AutoProcessor
3
- import gradio as gr
4
- import scipy.io.wavfile
5
 
6
- # Load model and processor
7
- model = VITSModel.from_pretrained("facebook/mms-tts-hmn")
8
  processor = AutoProcessor.from_pretrained("facebook/mms-tts-hmn")
 
9
 
10
- # Set model to eval mode
11
- model.eval()
 
12
 
13
- def tts(text):
14
- # Preprocess input
15
- inputs = processor(text, return_tensors="pt")
16
-
17
- # Generate audio
18
- with torch.no_grad():
19
- output = model(**inputs)
20
- audio = output.waveform[0].numpy()
21
-
22
- # Convert to 16-bit PCM WAV for Gradio playback
23
- sample_rate = model.config.sampling_rate
24
- return (sample_rate, audio)
25
-
26
- # Gradio UI
27
- gr.Interface(
28
- fn=tts,
29
- inputs=gr.Textbox(label="Nhập văn bản tiếng H'Mông"),
30
- outputs=gr.Audio(label="Phát âm"),
31
- title="Text-to-Speech tiếng H'Mông (TTS)",
32
- description="TTS sử dụng mô hình facebook/mms-tts-hmn (Meta MMS VITS)."
33
- ).launch()
 
1
+ from transformers import AutoProcessor, VitsModel
2
  import torch
3
+ import soundfile as sf
 
 
4
 
 
 
5
  processor = AutoProcessor.from_pretrained("facebook/mms-tts-hmn")
6
+ model = VitsModel.from_pretrained("facebook/mms-tts-hmn")
7
 
8
+ inputs = processor(text="Kuv hlub koj", return_tensors="pt")
9
+ with torch.no_grad():
10
+ speech = model(**inputs).waveform
11
 
12
+ sf.write("output.wav", speech.numpy()[0], samplerate=16000)