Spaces:
Runtime error
Runtime error
harveen
commited on
Commit
·
3704e25
1
Parent(s):
bcd8ef4
AdD
Browse files
ttsv/utils/inference/advanced_tts.py
CHANGED
@@ -73,6 +73,18 @@ def run_tts(text, lang, args):
|
|
73 |
|
74 |
def run_tts_paragraph(args):
|
75 |
audio_list = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
if args.split_sentences == 1:
|
77 |
text = normalize_text(args.text, args.lang)
|
78 |
split_sentences_list = split_sentences(text, args.lang)
|
@@ -98,10 +110,17 @@ def load_all_models(args):
|
|
98 |
if args.lang not in _TRANSLITERATION_NOT_AVAILABLE_IN:
|
99 |
engine = XlitEngine(args.lang) # loading translit model globally
|
100 |
|
101 |
-
global
|
102 |
-
global
|
103 |
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
try:
|
107 |
args.noise_scale = float(args.noise_scale)
|
|
|
73 |
|
74 |
def run_tts_paragraph(args):
|
75 |
audio_list = []
|
76 |
+
|
77 |
+
global text_to_mel
|
78 |
+
global mel_to_wav
|
79 |
+
|
80 |
+
if args.gender == 'Male':
|
81 |
+
text_to_mel = text_to_mel_list[1]
|
82 |
+
mel_to_wav = mel_to_wav_list[1]
|
83 |
+
else:
|
84 |
+
text_to_mel = text_to_mel_list[0]
|
85 |
+
mel_to_wav = mel_to_wav_list[0]
|
86 |
+
|
87 |
+
|
88 |
if args.split_sentences == 1:
|
89 |
text = normalize_text(args.text, args.lang)
|
90 |
split_sentences_list = split_sentences(text, args.lang)
|
|
|
110 |
if args.lang not in _TRANSLITERATION_NOT_AVAILABLE_IN:
|
111 |
engine = XlitEngine(args.lang) # loading translit model globally
|
112 |
|
113 |
+
global text_to_mel_list
|
114 |
+
global mel_to_wav_list
|
115 |
|
116 |
+
|
117 |
+
text_to_mel_list = []
|
118 |
+
mel_to_wav_list = []
|
119 |
+
|
120 |
+
for acoustic, vocoder in zip( args.acoustic.split(',') , args.vocoder.split(',') ):
|
121 |
+
ttm, mtw = load_models(acoustic, vocoder, args.device)
|
122 |
+
text_to_mel_list.append(ttm)
|
123 |
+
mel_to_wav_list.append(mtw)
|
124 |
|
125 |
try:
|
126 |
args.noise_scale = float(args.noise_scale)
|
ttsv/utils/inference/run_gradio.py
CHANGED
@@ -5,8 +5,9 @@ from argparse import Namespace
|
|
5 |
from .advanced_tts import load_all_models, run_tts_paragraph
|
6 |
|
7 |
|
8 |
-
def hit_tts(textbox, slider_noise_scale, slider_length_sclae, choice_transliteration, choice_number_conversion, choice_split_sentences):
|
9 |
inputs_to_gradio = {'text' : textbox,
|
|
|
10 |
'noise_scale': slider_noise_scale,
|
11 |
'length_scale': slider_length_sclae,
|
12 |
'transliteration' : 1 if choice_transliteration else 0,
|
@@ -17,6 +18,7 @@ def hit_tts(textbox, slider_noise_scale, slider_length_sclae, choice_translitera
|
|
17 |
args = Namespace(**inputs_to_gradio)
|
18 |
args.wav = None
|
19 |
args.lang = lang
|
|
|
20 |
|
21 |
if args.text:
|
22 |
sr, audio = run_tts_paragraph(args)
|
@@ -27,6 +29,7 @@ def build_gradio(args):
|
|
27 |
lang = args.lang
|
28 |
load_all_models(args)
|
29 |
textbox = gr.inputs.Textbox(placeholder="Enter Text to run", default="", label="TTS")
|
|
|
30 |
slider_noise_scale = gr.inputs.Slider(minimum=0, maximum=1.0, step=0.001, default=0.667, label='Enter Noise Scale')
|
31 |
slider_length_sclae = gr.inputs.Slider(minimum=0, maximum=2.0, step=0.1, default=1.0, label='Enter Slider Scale')
|
32 |
|
@@ -38,7 +41,7 @@ def build_gradio(args):
|
|
38 |
|
39 |
op = gr.outputs.Audio(type="numpy", label=None)
|
40 |
|
41 |
-
inputs_to_gradio = [textbox, slider_noise_scale, slider_length_sclae, choice_transliteration, choice_number_conversion, choice_split_sentences]
|
42 |
iface = gr.Interface(fn=hit_tts, inputs=inputs_to_gradio, outputs=op, theme='huggingface', title='Vakyansh Hindi TTS', article = 'Note: Transliteration models may not work well in some scenarios which can hamper the TTS quality, to evaluate the model in better sense it is advisable to provide input in the required langauge and switch off transliteration.')
|
43 |
iface.launch(share=True, enable_queue=True)
|
44 |
|
|
|
5 |
from .advanced_tts import load_all_models, run_tts_paragraph
|
6 |
|
7 |
|
8 |
+
def hit_tts(textbox, gender, slider_noise_scale, slider_length_sclae, choice_transliteration, choice_number_conversion, choice_split_sentences):
|
9 |
inputs_to_gradio = {'text' : textbox,
|
10 |
+
'gender' : gender,
|
11 |
'noise_scale': slider_noise_scale,
|
12 |
'length_scale': slider_length_sclae,
|
13 |
'transliteration' : 1 if choice_transliteration else 0,
|
|
|
18 |
args = Namespace(**inputs_to_gradio)
|
19 |
args.wav = None
|
20 |
args.lang = lang
|
21 |
+
args.gender = gender
|
22 |
|
23 |
if args.text:
|
24 |
sr, audio = run_tts_paragraph(args)
|
|
|
29 |
lang = args.lang
|
30 |
load_all_models(args)
|
31 |
textbox = gr.inputs.Textbox(placeholder="Enter Text to run", default="", label="TTS")
|
32 |
+
gender = gr.inputs.Dropdown(choices = ['Female', 'Male'], default='Female', label='Gender')
|
33 |
slider_noise_scale = gr.inputs.Slider(minimum=0, maximum=1.0, step=0.001, default=0.667, label='Enter Noise Scale')
|
34 |
slider_length_sclae = gr.inputs.Slider(minimum=0, maximum=2.0, step=0.1, default=1.0, label='Enter Slider Scale')
|
35 |
|
|
|
41 |
|
42 |
op = gr.outputs.Audio(type="numpy", label=None)
|
43 |
|
44 |
+
inputs_to_gradio = [textbox, gender, slider_noise_scale, slider_length_sclae, choice_transliteration, choice_number_conversion, choice_split_sentences]
|
45 |
iface = gr.Interface(fn=hit_tts, inputs=inputs_to_gradio, outputs=op, theme='huggingface', title='Vakyansh Hindi TTS', article = 'Note: Transliteration models may not work well in some scenarios which can hamper the TTS quality, to evaluate the model in better sense it is advisable to provide input in the required langauge and switch off transliteration.')
|
46 |
iface.launch(share=True, enable_queue=True)
|
47 |
|