Spaces:

Harveenchadha
/

Vakyansh-Hindi-TTS

Runtime error

App Files Files Community

harveen commited on Mar 26, 2022

Commit

3704e25

1 Parent(s): bcd8ef4

AdD

Browse files

Files changed (2) hide show

ttsv/utils/inference/advanced_tts.py +22 -3
ttsv/utils/inference/run_gradio.py +5 -2

ttsv/utils/inference/advanced_tts.py CHANGED Viewed

@@ -73,6 +73,18 @@ def run_tts(text, lang, args):
 def run_tts_paragraph(args):
     audio_list = []
     if args.split_sentences == 1:
         text = normalize_text(args.text, args.lang)
         split_sentences_list = split_sentences(text, args.lang)
@@ -98,10 +110,17 @@ def load_all_models(args):
     if args.lang not in _TRANSLITERATION_NOT_AVAILABLE_IN:
         engine = XlitEngine(args.lang) # loading translit model globally
-    global text_to_mel
-    global mel_to_wav
-    text_to_mel, mel_to_wav = load_models(args.acoustic, args.vocoder, args.device)
     try:
         args.noise_scale = float(args.noise_scale)

 def run_tts_paragraph(args):
     audio_list = []
+    global text_to_mel
+    global mel_to_wav
+    if args.gender == 'Male':
+        text_to_mel = text_to_mel_list[1]
+        mel_to_wav = mel_to_wav_list[1]
+    else:
+        text_to_mel = text_to_mel_list[0]
+        mel_to_wav = mel_to_wav_list[0]
     if args.split_sentences == 1:
         text = normalize_text(args.text, args.lang)
         split_sentences_list = split_sentences(text, args.lang)
     if args.lang not in _TRANSLITERATION_NOT_AVAILABLE_IN:
         engine = XlitEngine(args.lang) # loading translit model globally
+    global text_to_mel_list
+    global mel_to_wav_list
+    text_to_mel_list = []
+    mel_to_wav_list = []
+    for acoustic, vocoder in zip( args.acoustic.split(',') , args.vocoder.split(',') ):
+        ttm, mtw =  load_models(acoustic, vocoder, args.device)
+        text_to_mel_list.append(ttm)
+        mel_to_wav_list.append(mtw)
     try:
         args.noise_scale = float(args.noise_scale)

ttsv/utils/inference/run_gradio.py CHANGED Viewed

@@ -5,8 +5,9 @@ from argparse import Namespace
 from .advanced_tts import load_all_models, run_tts_paragraph
-def hit_tts(textbox, slider_noise_scale, slider_length_sclae, choice_transliteration, choice_number_conversion, choice_split_sentences):
     inputs_to_gradio = {'text' : textbox,
                         'noise_scale': slider_noise_scale,
                         'length_scale': slider_length_sclae,
                         'transliteration' : 1 if choice_transliteration else 0,
@@ -17,6 +18,7 @@ def hit_tts(textbox, slider_noise_scale, slider_length_sclae, choice_translitera
     args = Namespace(**inputs_to_gradio)
     args.wav = None
     args.lang = lang
     if args.text:
         sr, audio = run_tts_paragraph(args)
@@ -27,6 +29,7 @@ def build_gradio(args):
     lang = args.lang
     load_all_models(args)
     textbox = gr.inputs.Textbox(placeholder="Enter Text to run", default="", label="TTS")
     slider_noise_scale = gr.inputs.Slider(minimum=0, maximum=1.0, step=0.001, default=0.667, label='Enter Noise Scale')
     slider_length_sclae = gr.inputs.Slider(minimum=0, maximum=2.0, step=0.1, default=1.0, label='Enter Slider Scale')
@@ -38,7 +41,7 @@ def build_gradio(args):
     op = gr.outputs.Audio(type="numpy", label=None)
-    inputs_to_gradio = [textbox, slider_noise_scale, slider_length_sclae, choice_transliteration, choice_number_conversion, choice_split_sentences]
     iface = gr.Interface(fn=hit_tts, inputs=inputs_to_gradio, outputs=op, theme='huggingface', title='Vakyansh Hindi TTS', article = 'Note: Transliteration models may not work well in some scenarios which can hamper the TTS quality, to evaluate the model in better sense it is advisable to provide input in the required langauge and switch off transliteration.')
     iface.launch(share=True, enable_queue=True)

 from .advanced_tts import load_all_models, run_tts_paragraph
+def hit_tts(textbox, gender, slider_noise_scale, slider_length_sclae, choice_transliteration, choice_number_conversion, choice_split_sentences):
     inputs_to_gradio = {'text' : textbox,
+                        'gender' : gender,
                         'noise_scale': slider_noise_scale,
                         'length_scale': slider_length_sclae,
                         'transliteration' : 1 if choice_transliteration else 0,
     args = Namespace(**inputs_to_gradio)
     args.wav = None
     args.lang = lang
+    args.gender = gender
     if args.text:
         sr, audio = run_tts_paragraph(args)
     lang = args.lang
     load_all_models(args)
     textbox = gr.inputs.Textbox(placeholder="Enter Text to run", default="", label="TTS")
+    gender = gr.inputs.Dropdown(choices = ['Female', 'Male'], default='Female', label='Gender')
     slider_noise_scale = gr.inputs.Slider(minimum=0, maximum=1.0, step=0.001, default=0.667, label='Enter Noise Scale')
     slider_length_sclae = gr.inputs.Slider(minimum=0, maximum=2.0, step=0.1, default=1.0, label='Enter Slider Scale')
     op = gr.outputs.Audio(type="numpy", label=None)
+    inputs_to_gradio = [textbox, gender, slider_noise_scale, slider_length_sclae, choice_transliteration, choice_number_conversion, choice_split_sentences]
     iface = gr.Interface(fn=hit_tts, inputs=inputs_to_gradio, outputs=op, theme='huggingface', title='Vakyansh Hindi TTS', article = 'Note: Transliteration models may not work well in some scenarios which can hamper the TTS quality, to evaluate the model in better sense it is advisable to provide input in the required langauge and switch off transliteration.')
     iface.launch(share=True, enable_queue=True)