TTS-Arena-JA

Paused

App Files Files Community

alan commited on Jul 19, 2024

Commit

b78425c

1 Parent(s): f311f6c

spkers

Browse files

Files changed (1) hide show

app.py +71 -25

app.py CHANGED Viewed

@@ -48,9 +48,12 @@ AVAILABLE_MODELS = {
     # # 'Vokan TTS': 'vokan',
     # 'VoiceCraft 2.0': 'voicecraft',
     # 'Parler TTS': 'parler'
-    'MOE': 'moe',
     'BARK': 'bark',
-    'KOTOBA-SPEECH': 'kotoba-speech',
     'BLANE-TTS': 'blane-tts'
 }
@@ -117,14 +120,17 @@ def get_db():
 def get_tts_file(text: str, model: str):
     url = {
-        "kotoba-speech": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
         "blane-tts": "https://blane187-blane-tts.hf.space/call/get_audio_file"
     }
     headers = {
         "Content-Type": "application/json"
     }
     data = {
-        "kotoba-speech": {
             "data": [
                 text,
                 5,
@@ -135,6 +141,39 @@ def get_tts_file(text: str, model: str):
                 {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
             ]
         },
         "blane-tts": {
             "data": [
                 text,
@@ -346,8 +385,11 @@ model_names = {
     # 'speecht5': 'SpeechT5',
     # 'metavoice': 'MetaVoice-1B',
     'bark': 'BARK',
-    'moe': 'MOE',
-    'kotoba-speech': 'KOTOBA-SPEECH',
     'blane-tts': 'BLANE-TTS'
     # 'styletts2': 'StyleTTS 2',
 }
@@ -398,12 +440,15 @@ model_links = {
     # 'speecht5': 'https://github.com/microsoft/SpeechT5',
     # 'metavoice': 'https://github.com/metavoiceio/metavoice-src',
     'bark': 'https://suno-bark.hf.space/',
-    'moe': 'skytnt/moe-tts',
-    'kotoba-speech': 'https://kotoba-tech-kotoba-speech.hf.space/',
     'blane-tts': 'https://blane187-blane-tts.hf.space/'
 }
 model_kwargs = {
-    'moe': {
         'api_name': '/tts_fn_7'
     },
     'bark': {
@@ -672,17 +717,19 @@ def synthandreturn(text):
             # if model in AVAILABLE_MODELS:
             if model in model_names:
                 model_args = {
-                    'moe': (
                         text,
                     ),
                     'bark': (
                         text,
-                        'Speaker 0 (ja)',
                     ),
                 }
                 # result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
                 if model in model_kwargs:
                     router = Client(model_links[model])
                     result = router.predict(*model_args[model], **model_kwargs[model])
                 else:
                     result = get_tts_file(text, model)
@@ -710,12 +757,11 @@ def synthandreturn(text):
                 #     api_name="/process"
                 # )[0]
                 raise NotImplementedError
-                router = Client("skytnt/moe-tts")
-                result = router.predict(text_150_words_limitation=text, api_name="/tts_fn_7")[1]
         except:
             raise gr.Error('Unable to call API, please try again :)')
         print('Done with', model)
-        if model in {"moe"}:
             result = result[1]
         # try:
         #     doresample(result)
@@ -912,21 +958,21 @@ with gr.Blocks() as vote:
 with gr.Blocks() as about:
     gr.Markdown(ABOUT)
-# with gr.Blocks() as admin:
-#     rdb = gr.Button("Reload Audio Dataset")
-#     # rdb.click(reload_audio_dataset, outputs=rdb)
-#     with gr.Group():
-#         dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
-#         ddb = gr.Button("Delete DB")
-#     ddb.click(del_db, inputs=dbtext, outputs=ddb)
 with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="JA TTS Arena") as demo:
     gr.Markdown(DESCR)
-    # gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
-    gr.TabbedInterface([vote, leaderboard, about], ['🗳️ Vote', '🏆 Leaderboard', '📄 About'])
     if CITATION_TEXT:
         with gr.Row():
             with gr.Accordion("Citation", open=False):
                 gr.Markdown(f"If you use this data in your publication, please cite us!\n\nCopy the BibTeX citation to cite this source:\n\n```bibtext\n{CITATION_TEXT}\n```\n\nPlease remember that all generated audio clips should be assumed unsuitable for redistribution or commercial use.")
-demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)

     # # 'Vokan TTS': 'vokan',
     # 'VoiceCraft 2.0': 'voicecraft',
     # 'Parler TTS': 'parler'
+    'MOE-VITS': 'moe-vits',
     'BARK': 'bark',
+    'KOTOBA-SPEECH-AVA': 'kotoba-speech-ava',
+    'KOTOBA-SPEECH-BRIA': 'kotoba-speech-bria',
+    'KOTOBA-SPEECH-ALEX': 'kotoba-speech-alex',
+    'KOTOBA-SPEECH-JACOB': 'kotoba-speech-jacob',
     'BLANE-TTS': 'blane-tts'
 }
 def get_tts_file(text: str, model: str):
     url = {
+        "kotoba-speech-ava": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
+        "kotoba-speech-bria": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
+        "kotoba-speech-alex": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
+        "kotoba-speech-jacob": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
         "blane-tts": "https://blane187-blane-tts.hf.space/call/get_audio_file"
     }
     headers = {
         "Content-Type": "application/json"
     }
     data = {
+        "kotoba-speech-ava": {
             "data": [
                 text,
                 5,
                 {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
             ]
         },
+        "kotoba-speech-bria": {
+            "data": [
+                text,
+                5,
+                5,
+                "Preset voices",
+                "Bria",
+                {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"},
+                {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
+            ]
+        },
+        "kotoba-speech-alex": {
+            "data": [
+                text,
+                5,
+                5,
+                "Preset voices",
+                "Alex",
+                {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"},
+                {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
+            ]
+        },
+        "kotoba-speech-jacob": {
+            "data": [
+                text,
+                5,
+                5,
+                "Preset voices",
+                "Jacob",
+                {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"},
+                {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
+            ]
+        },
         "blane-tts": {
             "data": [
                 text,
     # 'speecht5': 'SpeechT5',
     # 'metavoice': 'MetaVoice-1B',
     'bark': 'BARK',
+    'moe-vits': 'MOE-VITS',
+    'kotoba-speech-ava': 'KOTOBA-SPEECH-v0.1-AVA',
+    'kotoba-speech-bria': 'KOTOBA-SPEECH-v0.1-BRIA',
+    'kotoba-speech-alex': 'KOTOBA-SPEECH-v0.1-ALEX',
+    'kotoba-speech-jacob': 'KOTOBA-SPEECH-v0.1-JACOB',
     'blane-tts': 'BLANE-TTS'
     # 'styletts2': 'StyleTTS 2',
 }
     # 'speecht5': 'https://github.com/microsoft/SpeechT5',
     # 'metavoice': 'https://github.com/metavoiceio/metavoice-src',
     'bark': 'https://suno-bark.hf.space/',
+    'moe-vits': 'skytnt/moe-tts',
+    'kotoba-speech-ava': 'https://kotoba-tech-kotoba-speech.hf.space/',
+    'kotoba-speech-bria': 'https://kotoba-tech-kotoba-speech.hf.space/',
+    'kotoba-speech-alex': 'https://kotoba-tech-kotoba-speech.hf.space/',
+    'kotoba-speech-jacob': 'https://kotoba-tech-kotoba-speech.hf.space/',
     'blane-tts': 'https://blane187-blane-tts.hf.space/'
 }
 model_kwargs = {
+    'moe-vits': {
         'api_name': '/tts_fn_7'
     },
     'bark': {
             # if model in AVAILABLE_MODELS:
             if model in model_names:
                 model_args = {
+                    'moe-vits': (
                         text,
                     ),
                     'bark': (
                         text,
+                        f'Speaker {random.choice(range(10))} (ja)',
                     ),
                 }
                 # result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
                 if model in model_kwargs:
                     router = Client(model_links[model])
+                    print(model_args[model])
+                    print(model_kwargs[model])
                     result = router.predict(*model_args[model], **model_kwargs[model])
                 else:
                     result = get_tts_file(text, model)
                 #     api_name="/process"
                 # )[0]
                 raise NotImplementedError
         except:
             raise gr.Error('Unable to call API, please try again :)')
         print('Done with', model)
+        if model in {"moe-vits"}:
             result = result[1]
         # try:
         #     doresample(result)
 with gr.Blocks() as about:
     gr.Markdown(ABOUT)
+with gr.Blocks() as admin:
+    rdb = gr.Button("Reload Audio Dataset")
+    # rdb.click(reload_audio_dataset, outputs=rdb)
+    with gr.Group():
+        dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
+        ddb = gr.Button("Delete DB")
+    ddb.click(del_db, inputs=dbtext, outputs=ddb)
 with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="JA TTS Arena") as demo:
     gr.Markdown(DESCR)
+    gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
+    # gr.TabbedInterface([vote, leaderboard, about], ['🗳️ Vote', '🏆 Leaderboard', '📄 About'])
     if CITATION_TEXT:
         with gr.Row():
             with gr.Accordion("Citation", open=False):
                 gr.Markdown(f"If you use this data in your publication, please cite us!\n\nCopy the BibTeX citation to cite this source:\n\n```bibtext\n{CITATION_TEXT}\n```\n\nPlease remember that all generated audio clips should be assumed unsuitable for redistribution or commercial use.")
+demo.launch()
+# demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)