alan commited on
Commit
b78425c
Β·
1 Parent(s): f311f6c
Files changed (1) hide show
  1. app.py +71 -25
app.py CHANGED
@@ -48,9 +48,12 @@ AVAILABLE_MODELS = {
48
  # # 'Vokan TTS': 'vokan',
49
  # 'VoiceCraft 2.0': 'voicecraft',
50
  # 'Parler TTS': 'parler'
51
- 'MOE': 'moe',
52
  'BARK': 'bark',
53
- 'KOTOBA-SPEECH': 'kotoba-speech',
 
 
 
54
  'BLANE-TTS': 'blane-tts'
55
  }
56
 
@@ -117,14 +120,17 @@ def get_db():
117
 
118
  def get_tts_file(text: str, model: str):
119
  url = {
120
- "kotoba-speech": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
 
 
 
121
  "blane-tts": "https://blane187-blane-tts.hf.space/call/get_audio_file"
122
  }
123
  headers = {
124
  "Content-Type": "application/json"
125
  }
126
  data = {
127
- "kotoba-speech": {
128
  "data": [
129
  text,
130
  5,
@@ -135,6 +141,39 @@ def get_tts_file(text: str, model: str):
135
  {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
136
  ]
137
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  "blane-tts": {
139
  "data": [
140
  text,
@@ -346,8 +385,11 @@ model_names = {
346
  # 'speecht5': 'SpeechT5',
347
  # 'metavoice': 'MetaVoice-1B',
348
  'bark': 'BARK',
349
- 'moe': 'MOE',
350
- 'kotoba-speech': 'KOTOBA-SPEECH',
 
 
 
351
  'blane-tts': 'BLANE-TTS'
352
  # 'styletts2': 'StyleTTS 2',
353
  }
@@ -398,12 +440,15 @@ model_links = {
398
  # 'speecht5': 'https://github.com/microsoft/SpeechT5',
399
  # 'metavoice': 'https://github.com/metavoiceio/metavoice-src',
400
  'bark': 'https://suno-bark.hf.space/',
401
- 'moe': 'skytnt/moe-tts',
402
- 'kotoba-speech': 'https://kotoba-tech-kotoba-speech.hf.space/',
 
 
 
403
  'blane-tts': 'https://blane187-blane-tts.hf.space/'
404
  }
405
  model_kwargs = {
406
- 'moe': {
407
  'api_name': '/tts_fn_7'
408
  },
409
  'bark': {
@@ -672,17 +717,19 @@ def synthandreturn(text):
672
  # if model in AVAILABLE_MODELS:
673
  if model in model_names:
674
  model_args = {
675
- 'moe': (
676
  text,
677
  ),
678
  'bark': (
679
  text,
680
- 'Speaker 0 (ja)',
681
  ),
682
  }
683
  # result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
684
  if model in model_kwargs:
685
  router = Client(model_links[model])
 
 
686
  result = router.predict(*model_args[model], **model_kwargs[model])
687
  else:
688
  result = get_tts_file(text, model)
@@ -710,12 +757,11 @@ def synthandreturn(text):
710
  # api_name="/process"
711
  # )[0]
712
  raise NotImplementedError
713
- router = Client("skytnt/moe-tts")
714
- result = router.predict(text_150_words_limitation=text, api_name="/tts_fn_7")[1]
715
  except:
716
  raise gr.Error('Unable to call API, please try again :)')
717
  print('Done with', model)
718
- if model in {"moe"}:
719
  result = result[1]
720
  # try:
721
  # doresample(result)
@@ -912,21 +958,21 @@ with gr.Blocks() as vote:
912
 
913
  with gr.Blocks() as about:
914
  gr.Markdown(ABOUT)
915
- # with gr.Blocks() as admin:
916
- # rdb = gr.Button("Reload Audio Dataset")
917
- # # rdb.click(reload_audio_dataset, outputs=rdb)
918
- # with gr.Group():
919
- # dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
920
- # ddb = gr.Button("Delete DB")
921
- # ddb.click(del_db, inputs=dbtext, outputs=ddb)
922
  with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="JA TTS Arena") as demo:
923
  gr.Markdown(DESCR)
924
- # gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
925
- gr.TabbedInterface([vote, leaderboard, about], ['πŸ—³οΈ Vote', 'πŸ† Leaderboard', 'πŸ“„ About'])
926
  if CITATION_TEXT:
927
  with gr.Row():
928
  with gr.Accordion("Citation", open=False):
929
  gr.Markdown(f"If you use this data in your publication, please cite us!\n\nCopy the BibTeX citation to cite this source:\n\n```bibtext\n{CITATION_TEXT}\n```\n\nPlease remember that all generated audio clips should be assumed unsuitable for redistribution or commercial use.")
930
 
931
-
932
- demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)
 
48
  # # 'Vokan TTS': 'vokan',
49
  # 'VoiceCraft 2.0': 'voicecraft',
50
  # 'Parler TTS': 'parler'
51
+ 'MOE-VITS': 'moe-vits',
52
  'BARK': 'bark',
53
+ 'KOTOBA-SPEECH-AVA': 'kotoba-speech-ava',
54
+ 'KOTOBA-SPEECH-BRIA': 'kotoba-speech-bria',
55
+ 'KOTOBA-SPEECH-ALEX': 'kotoba-speech-alex',
56
+ 'KOTOBA-SPEECH-JACOB': 'kotoba-speech-jacob',
57
  'BLANE-TTS': 'blane-tts'
58
  }
59
 
 
120
 
121
  def get_tts_file(text: str, model: str):
122
  url = {
123
+ "kotoba-speech-ava": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
124
+ "kotoba-speech-bria": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
125
+ "kotoba-speech-alex": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
126
+ "kotoba-speech-jacob": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
127
  "blane-tts": "https://blane187-blane-tts.hf.space/call/get_audio_file"
128
  }
129
  headers = {
130
  "Content-Type": "application/json"
131
  }
132
  data = {
133
+ "kotoba-speech-ava": {
134
  "data": [
135
  text,
136
  5,
 
141
  {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
142
  ]
143
  },
144
+ "kotoba-speech-bria": {
145
+ "data": [
146
+ text,
147
+ 5,
148
+ 5,
149
+ "Preset voices",
150
+ "Bria",
151
+ {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"},
152
+ {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
153
+ ]
154
+ },
155
+ "kotoba-speech-alex": {
156
+ "data": [
157
+ text,
158
+ 5,
159
+ 5,
160
+ "Preset voices",
161
+ "Alex",
162
+ {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"},
163
+ {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
164
+ ]
165
+ },
166
+ "kotoba-speech-jacob": {
167
+ "data": [
168
+ text,
169
+ 5,
170
+ 5,
171
+ "Preset voices",
172
+ "Jacob",
173
+ {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"},
174
+ {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
175
+ ]
176
+ },
177
  "blane-tts": {
178
  "data": [
179
  text,
 
385
  # 'speecht5': 'SpeechT5',
386
  # 'metavoice': 'MetaVoice-1B',
387
  'bark': 'BARK',
388
+ 'moe-vits': 'MOE-VITS',
389
+ 'kotoba-speech-ava': 'KOTOBA-SPEECH-v0.1-AVA',
390
+ 'kotoba-speech-bria': 'KOTOBA-SPEECH-v0.1-BRIA',
391
+ 'kotoba-speech-alex': 'KOTOBA-SPEECH-v0.1-ALEX',
392
+ 'kotoba-speech-jacob': 'KOTOBA-SPEECH-v0.1-JACOB',
393
  'blane-tts': 'BLANE-TTS'
394
  # 'styletts2': 'StyleTTS 2',
395
  }
 
440
  # 'speecht5': 'https://github.com/microsoft/SpeechT5',
441
  # 'metavoice': 'https://github.com/metavoiceio/metavoice-src',
442
  'bark': 'https://suno-bark.hf.space/',
443
+ 'moe-vits': 'skytnt/moe-tts',
444
+ 'kotoba-speech-ava': 'https://kotoba-tech-kotoba-speech.hf.space/',
445
+ 'kotoba-speech-bria': 'https://kotoba-tech-kotoba-speech.hf.space/',
446
+ 'kotoba-speech-alex': 'https://kotoba-tech-kotoba-speech.hf.space/',
447
+ 'kotoba-speech-jacob': 'https://kotoba-tech-kotoba-speech.hf.space/',
448
  'blane-tts': 'https://blane187-blane-tts.hf.space/'
449
  }
450
  model_kwargs = {
451
+ 'moe-vits': {
452
  'api_name': '/tts_fn_7'
453
  },
454
  'bark': {
 
717
  # if model in AVAILABLE_MODELS:
718
  if model in model_names:
719
  model_args = {
720
+ 'moe-vits': (
721
  text,
722
  ),
723
  'bark': (
724
  text,
725
+ f'Speaker {random.choice(range(10))} (ja)',
726
  ),
727
  }
728
  # result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
729
  if model in model_kwargs:
730
  router = Client(model_links[model])
731
+ print(model_args[model])
732
+ print(model_kwargs[model])
733
  result = router.predict(*model_args[model], **model_kwargs[model])
734
  else:
735
  result = get_tts_file(text, model)
 
757
  # api_name="/process"
758
  # )[0]
759
  raise NotImplementedError
760
+
 
761
  except:
762
  raise gr.Error('Unable to call API, please try again :)')
763
  print('Done with', model)
764
+ if model in {"moe-vits"}:
765
  result = result[1]
766
  # try:
767
  # doresample(result)
 
958
 
959
  with gr.Blocks() as about:
960
  gr.Markdown(ABOUT)
961
+ with gr.Blocks() as admin:
962
+ rdb = gr.Button("Reload Audio Dataset")
963
+ # rdb.click(reload_audio_dataset, outputs=rdb)
964
+ with gr.Group():
965
+ dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
966
+ ddb = gr.Button("Delete DB")
967
+ ddb.click(del_db, inputs=dbtext, outputs=ddb)
968
  with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="JA TTS Arena") as demo:
969
  gr.Markdown(DESCR)
970
+ gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
971
+ # gr.TabbedInterface([vote, leaderboard, about], ['πŸ—³οΈ Vote', 'πŸ† Leaderboard', 'πŸ“„ About'])
972
  if CITATION_TEXT:
973
  with gr.Row():
974
  with gr.Accordion("Citation", open=False):
975
  gr.Markdown(f"If you use this data in your publication, please cite us!\n\nCopy the BibTeX citation to cite this source:\n\n```bibtext\n{CITATION_TEXT}\n```\n\nPlease remember that all generated audio clips should be assumed unsuitable for redistribution or commercial use.")
976
 
977
+ demo.launch()
978
+ # demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)