Spaces:
Paused
Paused
alan
commited on
Commit
Β·
b78425c
1
Parent(s):
f311f6c
spkers
Browse files
app.py
CHANGED
|
@@ -48,9 +48,12 @@ AVAILABLE_MODELS = {
|
|
| 48 |
# # 'Vokan TTS': 'vokan',
|
| 49 |
# 'VoiceCraft 2.0': 'voicecraft',
|
| 50 |
# 'Parler TTS': 'parler'
|
| 51 |
-
'MOE': 'moe',
|
| 52 |
'BARK': 'bark',
|
| 53 |
-
'KOTOBA-SPEECH': 'kotoba-speech',
|
|
|
|
|
|
|
|
|
|
| 54 |
'BLANE-TTS': 'blane-tts'
|
| 55 |
}
|
| 56 |
|
|
@@ -117,14 +120,17 @@ def get_db():
|
|
| 117 |
|
| 118 |
def get_tts_file(text: str, model: str):
|
| 119 |
url = {
|
| 120 |
-
"kotoba-speech": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
|
|
|
|
|
|
|
|
|
|
| 121 |
"blane-tts": "https://blane187-blane-tts.hf.space/call/get_audio_file"
|
| 122 |
}
|
| 123 |
headers = {
|
| 124 |
"Content-Type": "application/json"
|
| 125 |
}
|
| 126 |
data = {
|
| 127 |
-
"kotoba-speech": {
|
| 128 |
"data": [
|
| 129 |
text,
|
| 130 |
5,
|
|
@@ -135,6 +141,39 @@ def get_tts_file(text: str, model: str):
|
|
| 135 |
{"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
|
| 136 |
]
|
| 137 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
"blane-tts": {
|
| 139 |
"data": [
|
| 140 |
text,
|
|
@@ -346,8 +385,11 @@ model_names = {
|
|
| 346 |
# 'speecht5': 'SpeechT5',
|
| 347 |
# 'metavoice': 'MetaVoice-1B',
|
| 348 |
'bark': 'BARK',
|
| 349 |
-
'moe': 'MOE',
|
| 350 |
-
'kotoba-speech': 'KOTOBA-SPEECH',
|
|
|
|
|
|
|
|
|
|
| 351 |
'blane-tts': 'BLANE-TTS'
|
| 352 |
# 'styletts2': 'StyleTTS 2',
|
| 353 |
}
|
|
@@ -398,12 +440,15 @@ model_links = {
|
|
| 398 |
# 'speecht5': 'https://github.com/microsoft/SpeechT5',
|
| 399 |
# 'metavoice': 'https://github.com/metavoiceio/metavoice-src',
|
| 400 |
'bark': 'https://suno-bark.hf.space/',
|
| 401 |
-
'moe': 'skytnt/moe-tts',
|
| 402 |
-
'kotoba-speech': 'https://kotoba-tech-kotoba-speech.hf.space/',
|
|
|
|
|
|
|
|
|
|
| 403 |
'blane-tts': 'https://blane187-blane-tts.hf.space/'
|
| 404 |
}
|
| 405 |
model_kwargs = {
|
| 406 |
-
'moe': {
|
| 407 |
'api_name': '/tts_fn_7'
|
| 408 |
},
|
| 409 |
'bark': {
|
|
@@ -672,17 +717,19 @@ def synthandreturn(text):
|
|
| 672 |
# if model in AVAILABLE_MODELS:
|
| 673 |
if model in model_names:
|
| 674 |
model_args = {
|
| 675 |
-
'moe': (
|
| 676 |
text,
|
| 677 |
),
|
| 678 |
'bark': (
|
| 679 |
text,
|
| 680 |
-
'Speaker
|
| 681 |
),
|
| 682 |
}
|
| 683 |
# result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
|
| 684 |
if model in model_kwargs:
|
| 685 |
router = Client(model_links[model])
|
|
|
|
|
|
|
| 686 |
result = router.predict(*model_args[model], **model_kwargs[model])
|
| 687 |
else:
|
| 688 |
result = get_tts_file(text, model)
|
|
@@ -710,12 +757,11 @@ def synthandreturn(text):
|
|
| 710 |
# api_name="/process"
|
| 711 |
# )[0]
|
| 712 |
raise NotImplementedError
|
| 713 |
-
|
| 714 |
-
result = router.predict(text_150_words_limitation=text, api_name="/tts_fn_7")[1]
|
| 715 |
except:
|
| 716 |
raise gr.Error('Unable to call API, please try again :)')
|
| 717 |
print('Done with', model)
|
| 718 |
-
if model in {"moe"}:
|
| 719 |
result = result[1]
|
| 720 |
# try:
|
| 721 |
# doresample(result)
|
|
@@ -912,21 +958,21 @@ with gr.Blocks() as vote:
|
|
| 912 |
|
| 913 |
with gr.Blocks() as about:
|
| 914 |
gr.Markdown(ABOUT)
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
#
|
| 918 |
-
|
| 919 |
-
|
| 920 |
-
|
| 921 |
-
|
| 922 |
with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="JA TTS Arena") as demo:
|
| 923 |
gr.Markdown(DESCR)
|
| 924 |
-
|
| 925 |
-
gr.TabbedInterface([vote, leaderboard, about], ['π³οΈ Vote', 'π Leaderboard', 'π About'])
|
| 926 |
if CITATION_TEXT:
|
| 927 |
with gr.Row():
|
| 928 |
with gr.Accordion("Citation", open=False):
|
| 929 |
gr.Markdown(f"If you use this data in your publication, please cite us!\n\nCopy the BibTeX citation to cite this source:\n\n```bibtext\n{CITATION_TEXT}\n```\n\nPlease remember that all generated audio clips should be assumed unsuitable for redistribution or commercial use.")
|
| 930 |
|
| 931 |
-
|
| 932 |
-
demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)
|
|
|
|
| 48 |
# # 'Vokan TTS': 'vokan',
|
| 49 |
# 'VoiceCraft 2.0': 'voicecraft',
|
| 50 |
# 'Parler TTS': 'parler'
|
| 51 |
+
'MOE-VITS': 'moe-vits',
|
| 52 |
'BARK': 'bark',
|
| 53 |
+
'KOTOBA-SPEECH-AVA': 'kotoba-speech-ava',
|
| 54 |
+
'KOTOBA-SPEECH-BRIA': 'kotoba-speech-bria',
|
| 55 |
+
'KOTOBA-SPEECH-ALEX': 'kotoba-speech-alex',
|
| 56 |
+
'KOTOBA-SPEECH-JACOB': 'kotoba-speech-jacob',
|
| 57 |
'BLANE-TTS': 'blane-tts'
|
| 58 |
}
|
| 59 |
|
|
|
|
| 120 |
|
| 121 |
def get_tts_file(text: str, model: str):
|
| 122 |
url = {
|
| 123 |
+
"kotoba-speech-ava": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
|
| 124 |
+
"kotoba-speech-bria": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
|
| 125 |
+
"kotoba-speech-alex": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
|
| 126 |
+
"kotoba-speech-jacob": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
|
| 127 |
"blane-tts": "https://blane187-blane-tts.hf.space/call/get_audio_file"
|
| 128 |
}
|
| 129 |
headers = {
|
| 130 |
"Content-Type": "application/json"
|
| 131 |
}
|
| 132 |
data = {
|
| 133 |
+
"kotoba-speech-ava": {
|
| 134 |
"data": [
|
| 135 |
text,
|
| 136 |
5,
|
|
|
|
| 141 |
{"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
|
| 142 |
]
|
| 143 |
},
|
| 144 |
+
"kotoba-speech-bria": {
|
| 145 |
+
"data": [
|
| 146 |
+
text,
|
| 147 |
+
5,
|
| 148 |
+
5,
|
| 149 |
+
"Preset voices",
|
| 150 |
+
"Bria",
|
| 151 |
+
{"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"},
|
| 152 |
+
{"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
|
| 153 |
+
]
|
| 154 |
+
},
|
| 155 |
+
"kotoba-speech-alex": {
|
| 156 |
+
"data": [
|
| 157 |
+
text,
|
| 158 |
+
5,
|
| 159 |
+
5,
|
| 160 |
+
"Preset voices",
|
| 161 |
+
"Alex",
|
| 162 |
+
{"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"},
|
| 163 |
+
{"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
|
| 164 |
+
]
|
| 165 |
+
},
|
| 166 |
+
"kotoba-speech-jacob": {
|
| 167 |
+
"data": [
|
| 168 |
+
text,
|
| 169 |
+
5,
|
| 170 |
+
5,
|
| 171 |
+
"Preset voices",
|
| 172 |
+
"Jacob",
|
| 173 |
+
{"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"},
|
| 174 |
+
{"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
|
| 175 |
+
]
|
| 176 |
+
},
|
| 177 |
"blane-tts": {
|
| 178 |
"data": [
|
| 179 |
text,
|
|
|
|
| 385 |
# 'speecht5': 'SpeechT5',
|
| 386 |
# 'metavoice': 'MetaVoice-1B',
|
| 387 |
'bark': 'BARK',
|
| 388 |
+
'moe-vits': 'MOE-VITS',
|
| 389 |
+
'kotoba-speech-ava': 'KOTOBA-SPEECH-v0.1-AVA',
|
| 390 |
+
'kotoba-speech-bria': 'KOTOBA-SPEECH-v0.1-BRIA',
|
| 391 |
+
'kotoba-speech-alex': 'KOTOBA-SPEECH-v0.1-ALEX',
|
| 392 |
+
'kotoba-speech-jacob': 'KOTOBA-SPEECH-v0.1-JACOB',
|
| 393 |
'blane-tts': 'BLANE-TTS'
|
| 394 |
# 'styletts2': 'StyleTTS 2',
|
| 395 |
}
|
|
|
|
| 440 |
# 'speecht5': 'https://github.com/microsoft/SpeechT5',
|
| 441 |
# 'metavoice': 'https://github.com/metavoiceio/metavoice-src',
|
| 442 |
'bark': 'https://suno-bark.hf.space/',
|
| 443 |
+
'moe-vits': 'skytnt/moe-tts',
|
| 444 |
+
'kotoba-speech-ava': 'https://kotoba-tech-kotoba-speech.hf.space/',
|
| 445 |
+
'kotoba-speech-bria': 'https://kotoba-tech-kotoba-speech.hf.space/',
|
| 446 |
+
'kotoba-speech-alex': 'https://kotoba-tech-kotoba-speech.hf.space/',
|
| 447 |
+
'kotoba-speech-jacob': 'https://kotoba-tech-kotoba-speech.hf.space/',
|
| 448 |
'blane-tts': 'https://blane187-blane-tts.hf.space/'
|
| 449 |
}
|
| 450 |
model_kwargs = {
|
| 451 |
+
'moe-vits': {
|
| 452 |
'api_name': '/tts_fn_7'
|
| 453 |
},
|
| 454 |
'bark': {
|
|
|
|
| 717 |
# if model in AVAILABLE_MODELS:
|
| 718 |
if model in model_names:
|
| 719 |
model_args = {
|
| 720 |
+
'moe-vits': (
|
| 721 |
text,
|
| 722 |
),
|
| 723 |
'bark': (
|
| 724 |
text,
|
| 725 |
+
f'Speaker {random.choice(range(10))} (ja)',
|
| 726 |
),
|
| 727 |
}
|
| 728 |
# result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
|
| 729 |
if model in model_kwargs:
|
| 730 |
router = Client(model_links[model])
|
| 731 |
+
print(model_args[model])
|
| 732 |
+
print(model_kwargs[model])
|
| 733 |
result = router.predict(*model_args[model], **model_kwargs[model])
|
| 734 |
else:
|
| 735 |
result = get_tts_file(text, model)
|
|
|
|
| 757 |
# api_name="/process"
|
| 758 |
# )[0]
|
| 759 |
raise NotImplementedError
|
| 760 |
+
|
|
|
|
| 761 |
except:
|
| 762 |
raise gr.Error('Unable to call API, please try again :)')
|
| 763 |
print('Done with', model)
|
| 764 |
+
if model in {"moe-vits"}:
|
| 765 |
result = result[1]
|
| 766 |
# try:
|
| 767 |
# doresample(result)
|
|
|
|
| 958 |
|
| 959 |
with gr.Blocks() as about:
|
| 960 |
gr.Markdown(ABOUT)
|
| 961 |
+
with gr.Blocks() as admin:
|
| 962 |
+
rdb = gr.Button("Reload Audio Dataset")
|
| 963 |
+
# rdb.click(reload_audio_dataset, outputs=rdb)
|
| 964 |
+
with gr.Group():
|
| 965 |
+
dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
|
| 966 |
+
ddb = gr.Button("Delete DB")
|
| 967 |
+
ddb.click(del_db, inputs=dbtext, outputs=ddb)
|
| 968 |
with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="JA TTS Arena") as demo:
|
| 969 |
gr.Markdown(DESCR)
|
| 970 |
+
gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
|
| 971 |
+
# gr.TabbedInterface([vote, leaderboard, about], ['π³οΈ Vote', 'π Leaderboard', 'π About'])
|
| 972 |
if CITATION_TEXT:
|
| 973 |
with gr.Row():
|
| 974 |
with gr.Accordion("Citation", open=False):
|
| 975 |
gr.Markdown(f"If you use this data in your publication, please cite us!\n\nCopy the BibTeX citation to cite this source:\n\n```bibtext\n{CITATION_TEXT}\n```\n\nPlease remember that all generated audio clips should be assumed unsuitable for redistribution or commercial use.")
|
| 976 |
|
| 977 |
+
demo.launch()
|
| 978 |
+
# demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)
|