Pendrokar commited on
Commit
11acbd9
โ€ข
1 Parent(s): d4c907a

component inits

Browse files
Files changed (1) hide show
  1. gr_client.py +135 -98
gr_client.py CHANGED
@@ -8,27 +8,33 @@ import gradio as gr
8
  from gradio_client import Client
9
 
10
  voice_models = [
11
- ("Male #6671", "ccby_nvidia_hifi_6671_M"),
12
- ("Male #6670", "ccby_nvidia_hifi_6670_M"),
 
 
 
13
  ("Male #9017", "ccby_nvidia_hifi_9017_M"),
14
  ("Male #6097", "ccby_nvidia_hifi_6097_M"),
15
- ("Female #92", "ccby_nvidia_hifi_92_F"),
16
- ("Female #11697", "ccby_nvidia_hifi_11697_F"),
17
- ("Female #12787", "ccby_nvidia_hifi_12787_F"),
18
- ("Female #11614", "ccby_nv_hifi_11614_F"),
19
  ("Female #8051", "ccby_nvidia_hifi_8051_F"),
 
20
  ("Female #9136", "ccby_nvidia_hifi_9136_F"),
21
  ]
22
  current_voice_model = None
23
 
24
  # order ranked by similarity to English due to the xVASynth's use of ARPAbet instead of IPA
25
  languages = [
26
- ("๐Ÿ‡ฌ๐Ÿ‡ง EN", "en"),
27
  ("๐Ÿ‡ฉ๐Ÿ‡ช DE", "de"),
28
  ("๐Ÿ‡ช๐Ÿ‡ธ ES", "es"),
29
- ("๐Ÿ‡ฎ๐Ÿ‡น IT", "it"),
 
 
 
30
  ("๐Ÿ‡ณ๐Ÿ‡ฑ NL", "nl"),
31
- ("๐Ÿ‡ต๐Ÿ‡น PT", "pt"),
 
32
  ("๐Ÿ‡ต๐Ÿ‡ฑ PL", "pl"),
33
  ("๐Ÿ‡ท๐Ÿ‡ด RO", "ro"),
34
  ("๐Ÿ‡ธ๐Ÿ‡ช SV", "sv"),
@@ -38,19 +44,17 @@ languages = [
38
  ("๐Ÿ‡ฌ๐Ÿ‡ท EL", "el"),
39
  ("๐Ÿ‡ซ๐Ÿ‡ท FR", "fr"),
40
  ("๐Ÿ‡ท๐Ÿ‡บ RU", "ru"),
41
- ("๐Ÿ‡บ๐Ÿ‡ฆ UK", "uk"),
42
  ("๐Ÿ‡น๐Ÿ‡ท TR", "tr"),
43
  ("๐Ÿ‡ธ๐Ÿ‡ฆ AR", "ar"),
44
- ("๐Ÿ‡ฎ๐Ÿ‡ณ HI", "hi"),
45
  ("๐Ÿ‡ฏ๐Ÿ‡ต JP", "jp"),
46
  ("๐Ÿ‡ฐ๐Ÿ‡ท KO", "ko"),
47
- ("๐Ÿ‡จ๐Ÿ‡ณ ZH", "zh"),
48
  ("๐Ÿ‡ป๐Ÿ‡ณ VI", "vi"),
49
  ("๐Ÿ‡ป๐Ÿ‡ฆ LA", "la"),
50
- ("HA", "ha"),
51
- ("SW", "sw"),
52
  ("๐Ÿ‡ณ๐Ÿ‡ฌ YO", "yo"),
53
- ("WO", "wo"),
 
 
54
  ]
55
 
56
  # Translated from English by DeepMind's Gemini Pro
@@ -150,47 +154,98 @@ def predict(
150
  round(json_data['em_surprise'][0], 2)
151
  ]
152
 
153
- input_textbox = gr.Textbox(
154
- label="Input Text",
155
- value="This is what my voice sounds like.",
156
- info="Also accepts ARPAbet symbols placed within {} brackets.",
157
- lines=1,
158
- max_lines=5,
159
- autofocus=True
160
- )
161
- pacing_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Duration")
162
- pitch_slider = gr.Slider(0, 1.0, value=0.5, step=0.05, label="Pitch", visible=False)
163
- energy_slider = gr.Slider(0.1, 1.0, value=1.0, step=0.05, label="Energy", visible=False)
164
- anger_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐Ÿ˜  Anger", info="Tread lightly beyond 0.9")
165
- happy_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐Ÿ˜ƒ Happiness", info="Tread lightly beyond 0.7")
166
- sad_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐Ÿ˜ญ Sadness", info="Duration increased when beyond 0.2")
167
- surprise_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐Ÿ˜ฎ Surprise", info="Does not play well with Happiness with either being beyond 0.3")
168
- voice_radio = gr.Radio(
169
- voice_models,
170
- value="ccby_nvidia_hifi_6671_M",
171
- label="Voice",
172
- info="NVIDIA HIFI CC-BY-4.0 xVAPitch voice model"
173
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
  def set_default_text(lang, deepmoji_checked):
176
  # DeepMoji only works on English Text
 
177
  if lang == 'en':
178
- checkbox_enabled = gr.Checkbox(
179
- label="Use DeepMoji",
180
- info="Auto adjust emotional values",
181
- value=deepmoji_checked,
182
- interactive=True
183
- )
184
  else:
185
- checkbox_enabled = gr.Checkbox(
186
- label="Use DeepMoji",
187
- info="Works only with English!",
188
- value=False,
189
- interactive=False
190
- )
191
 
 
192
  return default_text[lang], checkbox_enabled # Return the modified textbox (important for Blocks)
193
 
 
194
  en_examples = [
195
  "This is what my voice sounds like.",
196
  "If there is anything else you need, feel free to ask.",
@@ -204,22 +259,26 @@ en_examples = [
204
  # ARPAbet example
205
  "This { IH1 Z } { W AH1 T } { M AY1 } { V OY1 S } { S AW1 N D Z } like.",
206
  ]
 
 
 
 
 
 
 
 
207
 
208
  def set_example_as_input(example_text):
209
  return example_text
210
 
211
  def toggle_example_dropdown(lang):
 
212
  if lang == 'en':
213
- return gr.Dropdown(
214
- en_examples,
215
- value=en_examples[0],
216
- label="Example dropdown",
217
- show_label=False,
218
- info="English Examples",
219
- visible=True
220
- )
221
  else:
222
- return gr.Dropdown(visible=False)
 
 
223
 
224
  def reset_em_sliders(
225
  deepmoji_enabled,
@@ -255,12 +314,13 @@ def toggle_deepmoji(
255
  surprise
256
  )
257
 
258
- language_radio = gr.Radio(
259
- languages,
260
- value="en",
261
- label="Language",
262
- info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
263
- )
 
264
 
265
  _DESCRIPTION = '''
266
  <div>
@@ -277,48 +337,25 @@ with gr.Blocks(css=".arpabet {background-color: gray; border-radius: 5px; font-s
277
 
278
  with gr.Row(): # Main row for inputs and language selection
279
  with gr.Column(): # Input column
280
- input_textbox = gr.Textbox(
281
- label="Input Text",
282
- value="This is what my voice sounds like.",
283
- info="Also accepts ARPAbet symbols placed within {} brackets.",
284
- lines=1,
285
- max_lines=5,
286
- autofocus=True
287
- )
288
- language_radio = gr.Radio(
289
- languages,
290
- value="en",
291
- label="Language",
292
- info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
293
- )
294
 
295
  with gr.Row():
296
  with gr.Column():
297
- en_examples_dropdown = gr.Dropdown(
298
- en_examples,
299
- value=en_examples[0],
300
- label="Example dropdown",
301
- show_label=False,
302
- info="English Examples"
303
- )
304
  with gr.Column():
305
- pacing_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Duration")
306
  with gr.Column(): # Control column
307
- voice_radio = gr.Radio(
308
- voice_models,
309
- value="ccby_nvidia_hifi_6671_M",
310
- label="Voice",
311
- info="NVIDIA HIFI CC-BY-4.0 xVAPitch voice model"
312
- )
313
- pitch_slider = gr.Slider(0, 1.0, value=0.5, step=0.05, label="Pitch", visible=False)
314
- energy_slider = gr.Slider(0.1, 1.0, value=1.0, step=0.05, label="Energy", visible=False)
315
  with gr.Row(): # Main row for inputs and language selection
316
  with gr.Column(): # Input column
317
- anger_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐Ÿ˜  Anger", info="Tread lightly beyond 0.9")
318
- sad_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐Ÿ˜ญ Sadness", info="Duration increased when beyond 0.2")
319
  with gr.Column(): # Input column
320
- happy_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐Ÿ˜ƒ Happiness", info="Tread lightly beyond 0.7")
321
- surprise_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐Ÿ˜ฎ Surprise", info="Can oversaturate Happiness")
322
  deepmoji_checkbox = gr.Checkbox(label="Use DeepMoji", info="Auto adjust emotional values", value=True)
323
 
324
  # Event handling using click
@@ -432,6 +469,6 @@ with gr.Blocks(css=".arpabet {background-color: gray; border-radius: 5px; font-s
432
  if __name__ == "__main__":
433
  print('running Gradio interface')
434
  # gradio_app.launch()
435
- client = Client("Pendrokar/xVASynth")
436
 
437
  demo.launch()
 
8
  from gradio_client import Client
9
 
10
  voice_models = [
11
+ ("๐Ÿ‘จโ€๐Ÿฆณ #6671", "ccby_nvidia_hifi_6671_M"),
12
+ ("๐Ÿ‘ฑโ€โ™€๏ธ ๐Ÿ‡ฌ๐Ÿ‡ง #92", "ccby_nvidia_hifi_92_F"),
13
+ ]
14
+ voice_models_more = [
15
+ ("๐Ÿง” #6670", "ccby_nvidia_hifi_6670_M"),
16
  ("Male #9017", "ccby_nvidia_hifi_9017_M"),
17
  ("Male #6097", "ccby_nvidia_hifi_6097_M"),
18
+ ("๐Ÿ‘ฉโ€๐Ÿฆฑ #12787", "ccby_nvidia_hifi_12787_F"),
19
+ ("๐Ÿ‘ต #11614", "ccby_nv_hifi_11614_F"),
 
 
20
  ("Female #8051", "ccby_nvidia_hifi_8051_F"),
21
+ ("๐Ÿ‘ฉโ€๐Ÿฆณ #11697", "ccby_nvidia_hifi_11697_F"),
22
  ("Female #9136", "ccby_nvidia_hifi_9136_F"),
23
  ]
24
  current_voice_model = None
25
 
26
  # order ranked by similarity to English due to the xVASynth's use of ARPAbet instead of IPA
27
  languages = [
28
+ ("๐Ÿ‡บ๐Ÿ‡ธ EN", "en"),
29
  ("๐Ÿ‡ฉ๐Ÿ‡ช DE", "de"),
30
  ("๐Ÿ‡ช๐Ÿ‡ธ ES", "es"),
31
+ ("๐Ÿ‡ฎ๐Ÿ‡ณ HI", "hi"),
32
+ ("๐Ÿ‡จ๐Ÿ‡ณ ZH", "zh"),
33
+ ]
34
+ languages_more = [
35
  ("๐Ÿ‡ณ๐Ÿ‡ฑ NL", "nl"),
36
+ ("๐Ÿ‡ง๐Ÿ‡ท PT", "pt"),
37
+ ("๐Ÿ‡ฎ๐Ÿ‡น IT", "it"),
38
  ("๐Ÿ‡ต๐Ÿ‡ฑ PL", "pl"),
39
  ("๐Ÿ‡ท๐Ÿ‡ด RO", "ro"),
40
  ("๐Ÿ‡ธ๐Ÿ‡ช SV", "sv"),
 
44
  ("๐Ÿ‡ฌ๐Ÿ‡ท EL", "el"),
45
  ("๐Ÿ‡ซ๐Ÿ‡ท FR", "fr"),
46
  ("๐Ÿ‡ท๐Ÿ‡บ RU", "ru"),
47
+ ("๐Ÿ‡บ๐Ÿ‡ฆ UA", "uk"),
48
  ("๐Ÿ‡น๐Ÿ‡ท TR", "tr"),
49
  ("๐Ÿ‡ธ๐Ÿ‡ฆ AR", "ar"),
 
50
  ("๐Ÿ‡ฏ๐Ÿ‡ต JP", "jp"),
51
  ("๐Ÿ‡ฐ๐Ÿ‡ท KO", "ko"),
 
52
  ("๐Ÿ‡ป๐Ÿ‡ณ VI", "vi"),
53
  ("๐Ÿ‡ป๐Ÿ‡ฆ LA", "la"),
 
 
54
  ("๐Ÿ‡ณ๐Ÿ‡ฌ YO", "yo"),
55
+ ("Swahili", "sw"),
56
+ ("Hausa", "ha"),
57
+ ("Wolof", "wo"),
58
  ]
59
 
60
  # Translated from English by DeepMind's Gemini Pro
 
154
  round(json_data['em_surprise'][0], 2)
155
  ]
156
 
157
+ # Component defaults
158
+ input_textbox_init = {
159
+ 'label': "Input Text",
160
+ 'value': "This is what my voice sounds like.",
161
+ 'info': "Also accepts ARPAbet symbols placed within {} brackets.",
162
+ 'lines': 1,
163
+ 'max_lines': 5,
164
+ 'autofocus': True
165
+ }
166
+ pacing_slider_init = {
167
+ 'value': 1.0,
168
+ 'minimum': 0.5,
169
+ 'maximum': 2.0,
170
+ 'step': 0.1,
171
+ 'label': "Duration"
172
+ }
173
+ pitch_slider_init = {
174
+ 'minimum': 0,
175
+ 'maximum': 1.0,
176
+ 'value': 0.5,
177
+ 'step': 0.05,
178
+ 'label': "Pitch",
179
+ 'visible': False
180
+ }
181
+ energy_slider_init = {
182
+ 'minimum': 0.1,
183
+ 'maximum': 1.0,
184
+ 'value': 1.0,
185
+ 'step': 0.05,
186
+ 'label': "Energy",
187
+ 'visible': False
188
+ }
189
+ anger_slider_init = {
190
+ 'minimum': 0,
191
+ 'maximum': 1.0,
192
+ 'value': 0,
193
+ 'step': 0.05,
194
+ 'label': "๐Ÿ˜  Anger",
195
+ 'info': "Tread lightly beyond 0.9"
196
+ }
197
+ happy_slider_init = {
198
+ 'minimum': 0,
199
+ 'maximum': 1.0,
200
+ 'value': 0,
201
+ 'step': 0.05,
202
+ 'label': "๐Ÿ˜ƒ Happiness",
203
+ 'info': "Tread lightly beyond 0.7"
204
+ }
205
+ sad_slider_init = {
206
+ 'minimum': 0,
207
+ 'maximum': 1.0,
208
+ 'value': 0,
209
+ 'step': 0.05,
210
+ 'label': "๐Ÿ˜ญ Sadness",
211
+ 'info': "Duration increased when beyond 0.2"
212
+ }
213
+ surprise_slider_init = {
214
+ 'minimum': 0,
215
+ 'maximum': 1.0,
216
+ 'value': 0,
217
+ 'step': 0.05,
218
+ 'label': "๐Ÿ˜ฎ Surprise",
219
+ 'info': "Does not play well with Happiness with either being beyond 0.3"
220
+ }
221
+ voice_radio_init = {
222
+ 'choices': [*voice_models, (f'+{len(voice_models_more)}', 'more')],
223
+ 'value': "ccby_nvidia_hifi_6671_M",
224
+ 'label': "Voice",
225
+ 'info': "NVIDIA HIFI CC-BY-4.0 xVAPitch voice model"
226
+ }
227
+ deepmoji_checkbox_init = {
228
+ 'label': "Use DeepMoji",
229
+ 'info': "Auto adjust emotional values",
230
+ 'value': True,
231
+ 'interactive': True
232
+ }
233
 
234
  def set_default_text(lang, deepmoji_checked):
235
  # DeepMoji only works on English Text
236
+ checkbox_init = {**deepmoji_checkbox_init}
237
  if lang == 'en':
238
+ checkbox_init['value'] = deepmoji_checked,
239
+ checkbox_init['interactive'] = True
 
 
 
 
240
  else:
241
+ checkbox_init['info'] = "Works only with English!",
242
+ checkbox_init['value'] = False,
243
+ checkbox_init['interactive'] = False
 
 
 
244
 
245
+ checkbox_enabled = gr.Checkbox(**checkbox_init)
246
  return default_text[lang], checkbox_enabled # Return the modified textbox (important for Blocks)
247
 
248
+ # examples component
249
  en_examples = [
250
  "This is what my voice sounds like.",
251
  "If there is anything else you need, feel free to ask.",
 
259
  # ARPAbet example
260
  "This { IH1 Z } { W AH1 T } { M AY1 } { V OY1 S } { S AW1 N D Z } like.",
261
  ]
262
+ en_examples_dropdown_init = {
263
+ 'choices': en_examples,
264
+ 'value': en_examples[0],
265
+ 'label': "Example dropdown",
266
+ 'show_label': False,
267
+ 'info': "English Examples",
268
+ 'visible': True
269
+ }
270
 
271
  def set_example_as_input(example_text):
272
  return example_text
273
 
274
  def toggle_example_dropdown(lang):
275
+ dropdown_init = {**en_examples_dropdown_init}
276
  if lang == 'en':
277
+ dropdown_init['visible'] = True
 
 
 
 
 
 
 
278
  else:
279
+ dropdown_init['visible'] = False
280
+
281
+ return gr.Dropdown(**dropdown_init)
282
 
283
  def reset_em_sliders(
284
  deepmoji_enabled,
 
314
  surprise
315
  )
316
 
317
+ # languages component
318
+ language_radio_init = {
319
+ 'choices': [*languages, *[(f'+{len(languages_more)}', 'more')]],
320
+ 'value': "en",
321
+ 'label': "Language",
322
+ 'info': "Will be more monotone and have an English accent. Tested mostly by a native Briton."
323
+ }
324
 
325
  _DESCRIPTION = '''
326
  <div>
 
337
 
338
  with gr.Row(): # Main row for inputs and language selection
339
  with gr.Column(): # Input column
340
+ input_textbox = gr.Textbox(**input_textbox_init)
341
+ language_radio = gr.Radio(**language_radio_init)
 
 
 
 
 
 
 
 
 
 
 
 
342
 
343
  with gr.Row():
344
  with gr.Column():
345
+ en_examples_dropdown = gr.Dropdown(**en_examples_dropdown_init)
 
 
 
 
 
 
346
  with gr.Column():
347
+ pacing_slider = gr.Slider(**pacing_slider_init)
348
  with gr.Column(): # Control column
349
+ voice_radio = gr.Radio(**voice_radio_init)
350
+ pitch_slider = gr.Slider(**pitch_slider_init)
351
+ energy_slider = gr.Slider(**energy_slider_init)
 
 
 
 
 
352
  with gr.Row(): # Main row for inputs and language selection
353
  with gr.Column(): # Input column
354
+ anger_slider = gr.Slider(**anger_slider_init)
355
+ sad_slider = gr.Slider(**sad_slider_init)
356
  with gr.Column(): # Input column
357
+ happy_slider = gr.Slider(**happy_slider_init)
358
+ surprise_slider = gr.Slider(**surprise_slider_init)
359
  deepmoji_checkbox = gr.Checkbox(label="Use DeepMoji", info="Auto adjust emotional values", value=True)
360
 
361
  # Event handling using click
 
469
  if __name__ == "__main__":
470
  print('running Gradio interface')
471
  # gradio_app.launch()
472
+ # client = Client("Pendrokar/xVASynth")
473
 
474
  demo.launch()