Pendrokar commited on
Commit
b975979
1 Parent(s): 317535b

lojban v2 voice model

Browse files
Files changed (3) hide show
  1. README.md +3 -1
  2. app.py +9 -3
  3. gr_client.py +36 -5
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: xVASynth TTS
3
- emoji: 🧝‍♀️🧛‍♂️🧚‍♂
4
  colorFrom: gray
5
  colorTo: gray
6
  sdk: gradio
@@ -9,6 +9,7 @@ sdk_version: 4.20.0
9
  models:
10
  - Pendrokar/xvapitch_nvidia
11
  - Pendrokar/TorchMoji
 
12
  app_file: app.py
13
  app_port: 7860
14
  tags:
@@ -20,6 +21,7 @@ pinned: false
20
  preload_from_hub:
21
  - Pendrokar/xvapitch_nvidia
22
  - Pendrokar/TorchMoji
 
23
  license: gpl-3.0
24
  thumbnail: >-
25
  https://raw.githubusercontent.com/DanRuta/xVA-Synth/master/assets/x-icon.png
 
1
  ---
2
  title: xVASynth TTS
3
+ emoji: 🧝‍♀️🧛‍♂️🧚‍♀️
4
  colorFrom: gray
5
  colorTo: gray
6
  sdk: gradio
 
9
  models:
10
  - Pendrokar/xvapitch_nvidia
11
  - Pendrokar/TorchMoji
12
+ - Pendrokar/xvasynth_lojban
13
  app_file: app.py
14
  app_port: 7860
15
  tags:
 
21
  preload_from_hub:
22
  - Pendrokar/xvapitch_nvidia
23
  - Pendrokar/TorchMoji
24
+ - Pendrokar/xvasynth_lojban
25
  license: gpl-3.0
26
  thumbnail: >-
27
  https://raw.githubusercontent.com/DanRuta/xVA-Synth/master/assets/x-icon.png
app.py CHANGED
@@ -15,16 +15,22 @@ model_repo = HfApi()
15
  commits = model_repo.list_repo_commits(repo_id=hf_model_name)
16
  latest_commit_sha = commits[0].commit_id
17
  hf_cache_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvapitch_nvidia/snapshots/{latest_commit_sha}/'
 
18
  models_path = hf_cache_models_path
19
 
20
  current_voice_model = None
21
  base_speaker_emb = ''
22
 
23
  def load_model(voice_model_name):
24
- model_path = models_path + voice_model_name
 
 
 
 
 
 
25
 
26
- model_type = 'xVAPitch'
27
- language = 'en'
28
 
29
  data = {
30
  'outputs': None,
 
15
  commits = model_repo.list_repo_commits(repo_id=hf_model_name)
16
  latest_commit_sha = commits[0].commit_id
17
  hf_cache_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvapitch_nvidia/snapshots/{latest_commit_sha}/'
18
+ hf_cache_lojban_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvasynth_lojban/snapshots/{latest_commit_sha}/'
19
  models_path = hf_cache_models_path
20
 
21
  current_voice_model = None
22
  base_speaker_emb = ''
23
 
24
  def load_model(voice_model_name):
25
+ if voice_model_name == 'x_selpahi':
26
+ # Lojban
27
+ model_path = hf_cache_lojban_models_path + voice_model_name
28
+ model_type = 'FastPitch1.1'
29
+ else:
30
+ model_path = models_path + voice_model_name
31
+ model_type = 'xVAPitch'
32
 
33
+ language = 'en' # seems to have no effect if generated text is from a different language
 
34
 
35
  data = {
36
  'outputs': None,
gr_client.py CHANGED
@@ -9,13 +9,14 @@ voice_models = [
9
  ]
10
  voice_models_more = [
11
  ("🧔 #6670", "ccby_nvidia_hifi_6670_M"),
12
- ("Male #9017", "ccby_nvidia_hifi_9017_M"),
13
- ("Male #6097", "ccby_nvidia_hifi_6097_M"),
14
  ("👩‍🦱 #12787", "ccby_nvidia_hifi_12787_F"),
15
  ("👵 #11614", "ccby_nv_hifi_11614_F"),
16
- ("Female #8051", "ccby_nvidia_hifi_8051_F"),
17
  ("👩‍🦳 #11697", "ccby_nvidia_hifi_11697_F"),
18
- ("Female #9136", "ccby_nvidia_hifi_9136_F"),
 
19
  ]
20
 
21
  # order ranked by similarity to English due to the xVASynth's use of ARPAbet instead of IPA
@@ -52,6 +53,11 @@ languages_more = [
52
  ("Wolof", "wo"),
53
  ]
54
 
 
 
 
 
 
55
  # Translated from English by DeepMind's Gemini Pro
56
  default_text = {
57
  "ar": "هذا هو صوتي.",
@@ -66,6 +72,7 @@ default_text = {
66
  "hi": "यह मेरी आवाज़ कैसी लगती है।",
67
  "hu": "Így hangzik a hangom.",
68
  "it": "Così suona la mia voce.",
 
69
  "jp": "これが私の声です。",
70
  "ko": "여기 제 목소리가 어떤지 들어보세요.",
71
  "la": "Haec est vox mea sonans.",
@@ -285,6 +292,19 @@ language_radio_init = {
285
  'info': "Will be more monotone and have an English accent."
286
  }
287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  _DESCRIPTION = '''
289
  <div>
290
  <a style="display:inline-block;" href="https://github.com/DanRuta/xVA-Synth"><img src='https://img.shields.io/github/stars/DanRuta/xVA-Synth?style=social'/></a>
@@ -475,10 +495,21 @@ class BlocksDemo:
475
  queue=False,
476
  )
477
 
 
478
  voice_radio.change(
479
  self.set_default_audio,
480
  inputs=voice_radio,
481
- outputs=output_wav
 
 
 
 
 
 
 
 
 
 
482
  )
483
 
484
  return demo
 
9
  ]
10
  voice_models_more = [
11
  ("🧔 #6670", "ccby_nvidia_hifi_6670_M"),
12
+ ("👨‍🦲 #9017", "ccby_nvidia_hifi_9017_M"),
13
+ ("🧑 #6097", "ccby_nvidia_hifi_6097_M"),
14
  ("👩‍🦱 #12787", "ccby_nvidia_hifi_12787_F"),
15
  ("👵 #11614", "ccby_nv_hifi_11614_F"),
16
+ ("👩‍🦰 #8051", "ccby_nvidia_hifi_8051_F"),
17
  ("👩‍🦳 #11697", "ccby_nvidia_hifi_11697_F"),
18
+ ("👩‍🦲 #9136", "ccby_nvidia_hifi_9136_F"),
19
+ ("♟ Lojban", "x_selpahi"), # v2 model for Lojban, pre-multilingual capabilities of xVASynth
20
  ]
21
 
22
  # order ranked by similarity to English due to the xVASynth's use of ARPAbet instead of IPA
 
53
  ("Wolof", "wo"),
54
  ]
55
 
56
+ lojban_lang = [
57
+ # There is no ISO 639-1 for Lojban, but jb is valid
58
+ ('♟ Lojban', 'jb')
59
+ ]
60
+
61
  # Translated from English by DeepMind's Gemini Pro
62
  default_text = {
63
  "ar": "هذا هو صوتي.",
 
72
  "hi": "यह मेरी आवाज़ कैसी लगती है।",
73
  "hu": "Így hangzik a hangom.",
74
  "it": "Così suona la mia voce.",
75
+ "jb": ".i ",
76
  "jp": "これが私の声です。",
77
  "ko": "여기 제 목소리가 어떤지 들어보세요.",
78
  "la": "Haec est vox mea sonans.",
 
292
  'info': "Will be more monotone and have an English accent."
293
  }
294
 
295
+ def set_lojban_language(voice, lang):
296
+ if voice != 'x_selpahi':
297
+ return lang
298
+
299
+ radio_init = {**language_radio_init}
300
+ radio_init['choices'] = [
301
+ *lojban_lang,
302
+ *languages,
303
+ *languages_more,
304
+ ]
305
+ radio_init['value'] = lojban_lang[0][1]
306
+ return gr.Radio(**radio_init)
307
+
308
  _DESCRIPTION = '''
309
  <div>
310
  <a style="display:inline-block;" href="https://github.com/DanRuta/xVA-Synth"><img src='https://img.shields.io/github/stars/DanRuta/xVA-Synth?style=social'/></a>
 
495
  queue=False,
496
  )
497
 
498
+ # Replace output with voice audio sample
499
  voice_radio.change(
500
  self.set_default_audio,
501
  inputs=voice_radio,
502
+ outputs=output_wav,
503
+ queue=True,
504
+ )
505
+
506
+ # Switched to Lojban voice
507
+ voice_radio.change(
508
+ set_lojban_language,
509
+ inputs=[voice_radio, language_radio],
510
+ outputs=[language_radio],
511
+ trigger_mode='once',
512
+ queue=True,
513
  )
514
 
515
  return demo