|
from tts_infer.tts import TextToMel, MelToWav |
|
from tts_infer.num_to_word_on_sent import normalize_nums |
|
|
|
import gradio as gr |
|
|
|
|
|
device = 'cpu' |
|
|
|
def create_text_to_mel(glow_model_dir): |
|
return TextToMel(glow_model_dir=glow_model_dir, device=device) |
|
|
|
text_to_mel_female = create_text_to_mel('checkpoints/glow/female') |
|
mel_to_wav_female = MelToWav(hifi_model_dir='checkpoints/hifi/female', device=device) |
|
|
|
text_to_mel_male = create_text_to_mel('checkpoints/glow/male') |
|
mel_to_wav_male = MelToWav(hifi_model_dir='checkpoints/hifi/male', device=device) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_tts(text, selected_voice): |
|
lang = "pa" |
|
text = text.replace('।', '.') |
|
text_num_to_word = normalize_nums(text, lang) |
|
|
|
|
|
if selected_voice == "Male Voice": |
|
text_to_mel = text_to_mel_male |
|
mel_to_wav = mel_to_wav_male |
|
else: |
|
text_to_mel = text_to_mel_female |
|
mel_to_wav = mel_to_wav_female |
|
|
|
mel = text_to_mel.generate_mel(text_num_to_word) |
|
audio, sr = mel_to_wav.generate_wav(mel) |
|
return sr, audio |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
fn=run_tts, |
|
inputs=[ |
|
gr.Textbox(label="Enter Text"), |
|
gr.Radio( |
|
choices=["Male Voice", "Female Voice"], |
|
label="Select Voice" |
|
) |
|
], |
|
outputs=gr.Audio(label="Generated Audio"), |
|
title="Text to Speech Punjabi Language" |
|
) |
|
|
|
iface.launch() |
|
|
|
|
|
|