Jobanpreet's picture
Update app.py
ab655eb
from tts_infer.tts import TextToMel, MelToWav
from tts_infer.num_to_word_on_sent import normalize_nums
# from ai4bharat.transliteration import XlitEngine
import gradio as gr
device = 'cpu'
def create_text_to_mel(glow_model_dir):
return TextToMel(glow_model_dir=glow_model_dir, device=device)
text_to_mel_female = create_text_to_mel('checkpoints/glow/female')
mel_to_wav_female = MelToWav(hifi_model_dir='checkpoints/hifi/female', device=device)
text_to_mel_male = create_text_to_mel('checkpoints/glow/male')
mel_to_wav_male = MelToWav(hifi_model_dir='checkpoints/hifi/male', device=device)
# def translit(text, lang):
# engine = XlitEngine(lang)
# words = [engine.translit_word(word, topk=1)[lang][0] for word in text.split()]
# updated_sent = ' '.join(words)
# return updated_sent
def run_tts(text, selected_voice):
lang = "pa" # Punjabi language code
text = text.replace('।', '.')
text_num_to_word = normalize_nums(text, lang) # converting numbers to words in lang
# text_num_to_word_and_transliterated = translit(text_num_to_word, lang) # transliterating English words to lang
if selected_voice == "Male Voice":
text_to_mel = text_to_mel_male
mel_to_wav = mel_to_wav_male
else:
text_to_mel = text_to_mel_female
mel_to_wav = mel_to_wav_female
mel = text_to_mel.generate_mel(text_num_to_word)
audio, sr = mel_to_wav.generate_wav(mel)
return sr, audio
# iface = gr.Interface(
# fn=run_tts,
# inputs=[
# "textbox",
# gr.inputs.Dropdown(
# choices=["Male Voice", "Female Voice"],
# default="Female Voice",
# label="Select Voice"
# )
# ],
# outputs="audio",
# title="Text to Speech Punjabi Language"
# )
# iface.launch()
iface = gr.Interface(
fn=run_tts,
inputs=[
gr.Textbox(label="Enter Text"),
gr.Radio(
choices=["Male Voice", "Female Voice"],
label="Select Voice"
)
],
outputs=gr.Audio(label="Generated Audio"),
title="Text to Speech Punjabi Language"
)
iface.launch()