Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
gradio: Interface => Blocks
Browse files- app.py +236 -71
- gr_client.py +0 -1
app.py
CHANGED
@@ -35,34 +35,34 @@ base_speaker_emb = ''
|
|
35 |
|
36 |
# order ranked by similarity to English due to the xVASynth's use of ARPAbet instead of IPA
|
37 |
languages = [
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
]
|
67 |
|
68 |
# Translated from English by DeepMind's Gemini Pro
|
@@ -218,11 +218,33 @@ def predict(
|
|
218 |
save_path = ''
|
219 |
response = {text: 'Failed'}
|
220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
print('server.log contents:')
|
222 |
with open('resources/app/server.log', 'r') as f:
|
223 |
print(f.read())
|
224 |
|
225 |
-
return [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
|
227 |
input_textbox = gr.Textbox(
|
228 |
label="Input Text",
|
@@ -246,14 +268,62 @@ voice_radio = gr.Radio(
|
|
246 |
info="NVIDIA HIFI CC-BY-4.0 xVAPitch voice model"
|
247 |
)
|
248 |
|
249 |
-
def set_default_text(lang):
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
|
258 |
language_radio = gr.Radio(
|
259 |
languages,
|
@@ -261,40 +331,135 @@ language_radio = gr.Radio(
|
|
261 |
label="Language",
|
262 |
info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
|
263 |
)
|
264 |
-
# language_radio.change(set_default_text)
|
265 |
-
deepmoji_checkbox = gr.Checkbox(label="Use DeepMoji", info="Auto adjust emotional values")
|
266 |
-
|
267 |
-
gradio_app = gr.Interface(
|
268 |
-
predict,
|
269 |
-
[
|
270 |
-
input_textbox,
|
271 |
-
voice_radio,
|
272 |
-
language_radio,
|
273 |
-
pacing_slider,
|
274 |
-
pitch_slider,
|
275 |
-
energy_slider,
|
276 |
-
anger_slider,
|
277 |
-
happy_slider,
|
278 |
-
sad_slider,
|
279 |
-
surprise_slider,
|
280 |
-
deepmoji_checkbox
|
281 |
-
],
|
282 |
-
outputs=[
|
283 |
-
gr.Audio(label="22kHz audio output", type="filepath"),
|
284 |
-
gr.Textbox(label="xVASynth Server Response")
|
285 |
-
],
|
286 |
-
title="xVASynth (WIP)",
|
287 |
-
clear_btn=gr.Button(visible=False)
|
288 |
-
# examples=[
|
289 |
-
# ["Once, I headed in much deeper. But I doubt I'll ever do that again.", 1],
|
290 |
-
# ["You love hurting me, huh?", 1.5],
|
291 |
-
# ["Ah, I see. Well, I'm afraid I can't help with that.", 1],
|
292 |
-
# ["Embrace your demise!", 1],
|
293 |
-
# ["Never come back!", 1]
|
294 |
-
# ],
|
295 |
-
# cache_examples=None
|
296 |
-
)
|
297 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
|
299 |
if __name__ == "__main__":
|
300 |
# Run the web server in a separate thread
|
@@ -303,7 +468,7 @@ if __name__ == "__main__":
|
|
303 |
web_server_thread.start()
|
304 |
|
305 |
print('running Gradio interface')
|
306 |
-
|
307 |
|
308 |
# Wait for the web server thread to finish (shouldn't be reached in normal execution)
|
309 |
web_server_thread.join()
|
|
|
35 |
|
36 |
# order ranked by similarity to English due to the xVASynth's use of ARPAbet instead of IPA
|
37 |
languages = [
|
38 |
+
("๐ฌ๐ง EN", "en"),
|
39 |
+
("๐ฉ๐ช DE", "de"),
|
40 |
+
("๐ช๐ธ ES", "es"),
|
41 |
+
("๐ฎ๐น IT", "it"),
|
42 |
+
("๐ณ๐ฑ NL", "nl"),
|
43 |
+
("๐ต๐น PT", "pt"),
|
44 |
+
("๐ต๐ฑ PL", "pl"),
|
45 |
+
("๐ท๐ด RO", "ro"),
|
46 |
+
("๐ธ๐ช SV", "sv"),
|
47 |
+
("๐ฉ๐ฐ DA", "da"),
|
48 |
+
("๐ซ๐ฎ FI", "fi"),
|
49 |
+
("๐ญ๐บ HU", "hu"),
|
50 |
+
("๐ฌ๐ท EL", "el"),
|
51 |
+
("๐ซ๐ท FR", "fr"),
|
52 |
+
("๐ท๐บ RU", "ru"),
|
53 |
+
("๐บ๐ฆ UK", "uk"),
|
54 |
+
("๐น๐ท TR", "tr"),
|
55 |
+
("๐ธ๐ฆ AR", "ar"),
|
56 |
+
("๐ฎ๐ณ HI", "hi"),
|
57 |
+
("๐ฏ๐ต JP", "jp"),
|
58 |
+
("๐ฐ๐ท KO", "ko"),
|
59 |
+
("๐จ๐ณ ZH", "zh"),
|
60 |
+
("๐ป๐ณ VI", "vi"),
|
61 |
+
("๐ป๐ฆ LA", "la"),
|
62 |
+
("HA", "ha"),
|
63 |
+
("SW", "sw"),
|
64 |
+
("๐ณ๐ฌ YO", "yo"),
|
65 |
+
("WO", "wo"),
|
66 |
]
|
67 |
|
68 |
# Translated from English by DeepMind's Gemini Pro
|
|
|
218 |
save_path = ''
|
219 |
response = {text: 'Failed'}
|
220 |
|
221 |
+
|
222 |
+
json_data = json.loads(response)
|
223 |
+
|
224 |
+
arpabet_html = '<h6>ARPAbet & Durations</h6>'
|
225 |
+
arpabet_symbols = json_data['arpabet'].split('|')
|
226 |
+
for symb_i in range(len(json_data['durations'])):
|
227 |
+
if (arpabet_symbols[symb_i] == '<PAD>'):
|
228 |
+
continue
|
229 |
+
|
230 |
+
arpabet_html += '<strong class="arpabet" style="padding: 0 '\
|
231 |
+
+ str(round(float(json_data['durations'][symb_i]/2), 1))\
|
232 |
+
+'em">'\
|
233 |
+
+ arpabet_symbols[symb_i]\
|
234 |
+
+ '</strong> '
|
235 |
+
|
236 |
print('server.log contents:')
|
237 |
with open('resources/app/server.log', 'r') as f:
|
238 |
print(f.read())
|
239 |
|
240 |
+
return [
|
241 |
+
wav_path,
|
242 |
+
arpabet_html,
|
243 |
+
round(json_data['em_angry'][0], 2),
|
244 |
+
round(json_data['em_happy'][0], 2),
|
245 |
+
round(json_data['em_sad'][0], 2),
|
246 |
+
round(json_data['em_surprise'][0], 2)
|
247 |
+
]
|
248 |
|
249 |
input_textbox = gr.Textbox(
|
250 |
label="Input Text",
|
|
|
268 |
info="NVIDIA HIFI CC-BY-4.0 xVAPitch voice model"
|
269 |
)
|
270 |
|
271 |
+
def set_default_text(lang, deepmoji_checked):
|
272 |
+
# DeepMoji only works on English Text
|
273 |
+
# checkbox_enabled = True
|
274 |
+
# if lang != 'en':
|
275 |
+
# checkbox_enabled = False
|
276 |
+
|
277 |
+
if lang == 'en':
|
278 |
+
checkbox_enabled = gr.Checkbox(
|
279 |
+
label="Use DeepMoji",
|
280 |
+
info="Auto adjust emotional values",
|
281 |
+
value=deepmoji_checked,
|
282 |
+
interactive=True
|
283 |
+
)
|
284 |
+
else:
|
285 |
+
checkbox_enabled = gr.Checkbox(
|
286 |
+
label="Use DeepMoji",
|
287 |
+
info="Works only with English!",
|
288 |
+
value=False,
|
289 |
+
interactive=False
|
290 |
+
)
|
291 |
+
|
292 |
+
return default_text[lang], checkbox_enabled # Return the modified textbox (important for Blocks)
|
293 |
+
|
294 |
+
def reset_em_sliders(
|
295 |
+
deepmoji_enabled,
|
296 |
+
anger,
|
297 |
+
happy,
|
298 |
+
sad,
|
299 |
+
surprise
|
300 |
+
):
|
301 |
+
if (deepmoji_enabled):
|
302 |
+
return (0, 0, 0, 0)
|
303 |
+
else:
|
304 |
+
return (
|
305 |
+
anger,
|
306 |
+
happy,
|
307 |
+
sad,
|
308 |
+
surprise
|
309 |
+
)
|
310 |
+
|
311 |
+
def toggle_deepmoji(
|
312 |
+
checked,
|
313 |
+
anger,
|
314 |
+
happy,
|
315 |
+
sad,
|
316 |
+
surprise
|
317 |
+
):
|
318 |
+
if checked:
|
319 |
+
return (0, 0, 0, 0)
|
320 |
+
else:
|
321 |
+
return (
|
322 |
+
anger,
|
323 |
+
happy,
|
324 |
+
sad,
|
325 |
+
surprise
|
326 |
+
)
|
327 |
|
328 |
language_radio = gr.Radio(
|
329 |
languages,
|
|
|
331 |
label="Language",
|
332 |
info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
|
333 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
334 |
|
335 |
+
with gr.Blocks(css=".arpabet {display: inline-block; background-color: gray; border-radius: 5px; font-size: 120%; margin: 0.1em 0}") as demo:
|
336 |
+
gr.Markdown("# xVASynth TTS")
|
337 |
+
|
338 |
+
with gr.Row(): # Main row for inputs and language selection
|
339 |
+
with gr.Column(): # Input column
|
340 |
+
input_textbox = gr.Textbox(
|
341 |
+
label="Input Text",
|
342 |
+
value="This is what my voice sounds like.",
|
343 |
+
info="Also accepts ARPAbet symbols placed within {} brackets.",
|
344 |
+
lines=1,
|
345 |
+
max_lines=5,
|
346 |
+
autofocus=True
|
347 |
+
)
|
348 |
+
language_radio = gr.Radio(
|
349 |
+
languages,
|
350 |
+
value="en",
|
351 |
+
label="Language",
|
352 |
+
info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
|
353 |
+
)
|
354 |
+
pacing_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Duration")
|
355 |
+
with gr.Column(): # Control column
|
356 |
+
voice_radio = gr.Radio(
|
357 |
+
voice_models,
|
358 |
+
value="ccby_nvidia_hifi_6671_M",
|
359 |
+
label="Voice",
|
360 |
+
info="NVIDIA HIFI CC-BY-4.0 xVAPitch voice model"
|
361 |
+
)
|
362 |
+
pitch_slider = gr.Slider(0, 1.0, value=0.5, step=0.05, label="Pitch", visible=False)
|
363 |
+
energy_slider = gr.Slider(0.1, 1.0, value=1.0, step=0.05, label="Energy", visible=False)
|
364 |
+
with gr.Row(): # Main row for inputs and language selection
|
365 |
+
with gr.Column(): # Input column
|
366 |
+
anger_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐ Anger", info="Tread lightly beyond 0.9")
|
367 |
+
sad_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐ญ Sadness", info="Duration increased when beyond 0.2")
|
368 |
+
with gr.Column(): # Input column
|
369 |
+
happy_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐ Happiness", info="Tread lightly beyond 0.7")
|
370 |
+
surprise_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐ฎ Surprise", info="Can oversaturate Happiness")
|
371 |
+
deepmoji_checkbox = gr.Checkbox(label="Use DeepMoji", info="Auto adjust emotional values", value=True)
|
372 |
+
|
373 |
+
# Event handling using click
|
374 |
+
btn = gr.Button("Generate")
|
375 |
+
|
376 |
+
with gr.Row(): # Main row for inputs and language selection
|
377 |
+
with gr.Column(): # Input column
|
378 |
+
output_wav = gr.Audio(label="22kHz audio output", type="filepath", editable=False)
|
379 |
+
with gr.Column(): # Input column
|
380 |
+
output_arpabet = gr.HTML(label="ARPAbet")
|
381 |
+
|
382 |
+
btn.click(
|
383 |
+
fn=predict,
|
384 |
+
inputs=[
|
385 |
+
input_textbox,
|
386 |
+
voice_radio,
|
387 |
+
language_radio,
|
388 |
+
pacing_slider,
|
389 |
+
pitch_slider,
|
390 |
+
energy_slider,
|
391 |
+
anger_slider,
|
392 |
+
happy_slider,
|
393 |
+
sad_slider,
|
394 |
+
surprise_slider,
|
395 |
+
deepmoji_checkbox
|
396 |
+
],
|
397 |
+
outputs=[
|
398 |
+
output_wav,
|
399 |
+
output_arpabet,
|
400 |
+
anger_slider,
|
401 |
+
happy_slider,
|
402 |
+
sad_slider,
|
403 |
+
surprise_slider
|
404 |
+
]
|
405 |
+
)
|
406 |
+
|
407 |
+
language_radio.change(
|
408 |
+
set_default_text,
|
409 |
+
inputs=[language_radio, deepmoji_checkbox],
|
410 |
+
outputs=[input_textbox, deepmoji_checkbox]
|
411 |
+
)
|
412 |
+
|
413 |
+
deepmoji_checkbox.change(
|
414 |
+
toggle_deepmoji,
|
415 |
+
inputs=[
|
416 |
+
deepmoji_checkbox,
|
417 |
+
anger_slider,
|
418 |
+
happy_slider,
|
419 |
+
sad_slider,
|
420 |
+
surprise_slider
|
421 |
+
],
|
422 |
+
outputs=[
|
423 |
+
anger_slider,
|
424 |
+
happy_slider,
|
425 |
+
sad_slider,
|
426 |
+
surprise_slider
|
427 |
+
]
|
428 |
+
)
|
429 |
+
|
430 |
+
input_textbox.change(
|
431 |
+
reset_em_sliders,
|
432 |
+
inputs=[
|
433 |
+
deepmoji_checkbox,
|
434 |
+
anger_slider,
|
435 |
+
happy_slider,
|
436 |
+
sad_slider,
|
437 |
+
surprise_slider
|
438 |
+
],
|
439 |
+
outputs=[
|
440 |
+
anger_slider,
|
441 |
+
happy_slider,
|
442 |
+
sad_slider,
|
443 |
+
surprise_slider
|
444 |
+
]
|
445 |
+
)
|
446 |
+
|
447 |
+
voice_radio.change(
|
448 |
+
reset_em_sliders,
|
449 |
+
inputs=[
|
450 |
+
deepmoji_checkbox,
|
451 |
+
anger_slider,
|
452 |
+
happy_slider,
|
453 |
+
sad_slider,
|
454 |
+
surprise_slider
|
455 |
+
],
|
456 |
+
outputs=[
|
457 |
+
anger_slider,
|
458 |
+
happy_slider,
|
459 |
+
sad_slider,
|
460 |
+
surprise_slider
|
461 |
+
]
|
462 |
+
)
|
463 |
|
464 |
if __name__ == "__main__":
|
465 |
# Run the web server in a separate thread
|
|
|
468 |
web_server_thread.start()
|
469 |
|
470 |
print('running Gradio interface')
|
471 |
+
demo.launch()
|
472 |
|
473 |
# Wait for the web server thread to finish (shouldn't be reached in normal execution)
|
474 |
web_server_thread.join()
|
gr_client.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import os
|
2 |
import sys
|
3 |
-
import copy
|
4 |
import time
|
5 |
import requests
|
6 |
import json
|
|
|
1 |
import os
|
2 |
import sys
|
|
|
3 |
import time
|
4 |
import requests
|
5 |
import json
|