harveen commited on
Commit
3704e25
·
1 Parent(s): bcd8ef4
ttsv/utils/inference/advanced_tts.py CHANGED
@@ -73,6 +73,18 @@ def run_tts(text, lang, args):
73
 
74
  def run_tts_paragraph(args):
75
  audio_list = []
 
 
 
 
 
 
 
 
 
 
 
 
76
  if args.split_sentences == 1:
77
  text = normalize_text(args.text, args.lang)
78
  split_sentences_list = split_sentences(text, args.lang)
@@ -98,10 +110,17 @@ def load_all_models(args):
98
  if args.lang not in _TRANSLITERATION_NOT_AVAILABLE_IN:
99
  engine = XlitEngine(args.lang) # loading translit model globally
100
 
101
- global text_to_mel
102
- global mel_to_wav
103
 
104
- text_to_mel, mel_to_wav = load_models(args.acoustic, args.vocoder, args.device)
 
 
 
 
 
 
 
105
 
106
  try:
107
  args.noise_scale = float(args.noise_scale)
 
73
 
74
  def run_tts_paragraph(args):
75
  audio_list = []
76
+
77
+ global text_to_mel
78
+ global mel_to_wav
79
+
80
+ if args.gender == 'Male':
81
+ text_to_mel = text_to_mel_list[1]
82
+ mel_to_wav = mel_to_wav_list[1]
83
+ else:
84
+ text_to_mel = text_to_mel_list[0]
85
+ mel_to_wav = mel_to_wav_list[0]
86
+
87
+
88
  if args.split_sentences == 1:
89
  text = normalize_text(args.text, args.lang)
90
  split_sentences_list = split_sentences(text, args.lang)
 
110
  if args.lang not in _TRANSLITERATION_NOT_AVAILABLE_IN:
111
  engine = XlitEngine(args.lang) # loading translit model globally
112
 
113
+ global text_to_mel_list
114
+ global mel_to_wav_list
115
 
116
+
117
+ text_to_mel_list = []
118
+ mel_to_wav_list = []
119
+
120
+ for acoustic, vocoder in zip( args.acoustic.split(',') , args.vocoder.split(',') ):
121
+ ttm, mtw = load_models(acoustic, vocoder, args.device)
122
+ text_to_mel_list.append(ttm)
123
+ mel_to_wav_list.append(mtw)
124
 
125
  try:
126
  args.noise_scale = float(args.noise_scale)
ttsv/utils/inference/run_gradio.py CHANGED
@@ -5,8 +5,9 @@ from argparse import Namespace
5
  from .advanced_tts import load_all_models, run_tts_paragraph
6
 
7
 
8
- def hit_tts(textbox, slider_noise_scale, slider_length_sclae, choice_transliteration, choice_number_conversion, choice_split_sentences):
9
  inputs_to_gradio = {'text' : textbox,
 
10
  'noise_scale': slider_noise_scale,
11
  'length_scale': slider_length_sclae,
12
  'transliteration' : 1 if choice_transliteration else 0,
@@ -17,6 +18,7 @@ def hit_tts(textbox, slider_noise_scale, slider_length_sclae, choice_translitera
17
  args = Namespace(**inputs_to_gradio)
18
  args.wav = None
19
  args.lang = lang
 
20
 
21
  if args.text:
22
  sr, audio = run_tts_paragraph(args)
@@ -27,6 +29,7 @@ def build_gradio(args):
27
  lang = args.lang
28
  load_all_models(args)
29
  textbox = gr.inputs.Textbox(placeholder="Enter Text to run", default="", label="TTS")
 
30
  slider_noise_scale = gr.inputs.Slider(minimum=0, maximum=1.0, step=0.001, default=0.667, label='Enter Noise Scale')
31
  slider_length_sclae = gr.inputs.Slider(minimum=0, maximum=2.0, step=0.1, default=1.0, label='Enter Slider Scale')
32
 
@@ -38,7 +41,7 @@ def build_gradio(args):
38
 
39
  op = gr.outputs.Audio(type="numpy", label=None)
40
 
41
- inputs_to_gradio = [textbox, slider_noise_scale, slider_length_sclae, choice_transliteration, choice_number_conversion, choice_split_sentences]
42
  iface = gr.Interface(fn=hit_tts, inputs=inputs_to_gradio, outputs=op, theme='huggingface', title='Vakyansh Hindi TTS', article = 'Note: Transliteration models may not work well in some scenarios which can hamper the TTS quality, to evaluate the model in better sense it is advisable to provide input in the required langauge and switch off transliteration.')
43
  iface.launch(share=True, enable_queue=True)
44
 
 
5
  from .advanced_tts import load_all_models, run_tts_paragraph
6
 
7
 
8
+ def hit_tts(textbox, gender, slider_noise_scale, slider_length_sclae, choice_transliteration, choice_number_conversion, choice_split_sentences):
9
  inputs_to_gradio = {'text' : textbox,
10
+ 'gender' : gender,
11
  'noise_scale': slider_noise_scale,
12
  'length_scale': slider_length_sclae,
13
  'transliteration' : 1 if choice_transliteration else 0,
 
18
  args = Namespace(**inputs_to_gradio)
19
  args.wav = None
20
  args.lang = lang
21
+ args.gender = gender
22
 
23
  if args.text:
24
  sr, audio = run_tts_paragraph(args)
 
29
  lang = args.lang
30
  load_all_models(args)
31
  textbox = gr.inputs.Textbox(placeholder="Enter Text to run", default="", label="TTS")
32
+ gender = gr.inputs.Dropdown(choices = ['Female', 'Male'], default='Female', label='Gender')
33
  slider_noise_scale = gr.inputs.Slider(minimum=0, maximum=1.0, step=0.001, default=0.667, label='Enter Noise Scale')
34
  slider_length_sclae = gr.inputs.Slider(minimum=0, maximum=2.0, step=0.1, default=1.0, label='Enter Slider Scale')
35
 
 
41
 
42
  op = gr.outputs.Audio(type="numpy", label=None)
43
 
44
+ inputs_to_gradio = [textbox, gender, slider_noise_scale, slider_length_sclae, choice_transliteration, choice_number_conversion, choice_split_sentences]
45
  iface = gr.Interface(fn=hit_tts, inputs=inputs_to_gradio, outputs=op, theme='huggingface', title='Vakyansh Hindi TTS', article = 'Note: Transliteration models may not work well in some scenarios which can hamper the TTS quality, to evaluate the model in better sense it is advisable to provide input in the required langauge and switch off transliteration.')
46
  iface.launch(share=True, enable_queue=True)
47