from tts_infer.tts import TextToMel, MelToWav from tts_infer.num_to_word_on_sent import normalize_nums # from ai4bharat.transliteration import XlitEngine import gradio as gr device = 'cpu' def create_text_to_mel(glow_model_dir): return TextToMel(glow_model_dir=glow_model_dir, device=device) text_to_mel_female = create_text_to_mel('checkpoints/glow/female') mel_to_wav_female = MelToWav(hifi_model_dir='checkpoints/hifi/female', device=device) text_to_mel_male = create_text_to_mel('checkpoints/glow/male') mel_to_wav_male = MelToWav(hifi_model_dir='checkpoints/hifi/male', device=device) # def translit(text, lang): # engine = XlitEngine(lang) # words = [engine.translit_word(word, topk=1)[lang][0] for word in text.split()] # updated_sent = ' '.join(words) # return updated_sent def run_tts(text, selected_voice): lang = "pa" # Punjabi language code text = text.replace('ред', '.') text_num_to_word = normalize_nums(text, lang) # converting numbers to words in lang # text_num_to_word_and_transliterated = translit(text_num_to_word, lang) # transliterating English words to lang if selected_voice == "Male Voice": text_to_mel = text_to_mel_male mel_to_wav = mel_to_wav_male else: text_to_mel = text_to_mel_female mel_to_wav = mel_to_wav_female mel = text_to_mel.generate_mel(text_num_to_word) audio, sr = mel_to_wav.generate_wav(mel) return sr, audio # iface = gr.Interface( # fn=run_tts, # inputs=[ # "textbox", # gr.inputs.Dropdown( # choices=["Male Voice", "Female Voice"], # default="Female Voice", # label="Select Voice" # ) # ], # outputs="audio", # title="Text to Speech Punjabi Language" # ) # iface.launch() iface = gr.Interface( fn=run_tts, inputs=[ gr.Textbox(label="Enter Text"), gr.Radio( choices=["Male Voice", "Female Voice"], label="Select Voice" ) ], outputs=gr.Audio(label="Generated Audio"), title="Text to Speech Punjabi Language" ) iface.launch()