Spaces:
Running
Running
give newcomer a cached sample pair; changed default voice clone for TTS
Browse files
README.md
CHANGED
@@ -9,7 +9,7 @@ emoji: π€π
|
|
9 |
colorFrom: red
|
10 |
colorTo: red
|
11 |
pinned: false
|
12 |
-
short_description:
|
13 |
models:
|
14 |
- coqui/XTTS-v2
|
15 |
- fishaudio/fish-speech-1.4
|
|
|
9 |
colorFrom: red
|
10 |
colorTo: red
|
11 |
pinned: false
|
12 |
+
short_description: Vote on the top HF TTS models!
|
13 |
models:
|
14 |
- coqui/XTTS-v2
|
15 |
- fishaudio/fish-speech-1.4
|
app.py
CHANGED
@@ -44,6 +44,9 @@ with open('harvard_sentences.txt') as f:
|
|
44 |
sents += f.read().strip().splitlines()
|
45 |
with open('llama3_command-r_sentences.txt') as f:
|
46 |
sents += f.read().strip().splitlines()
|
|
|
|
|
|
|
47 |
####################################
|
48 |
# Constants
|
49 |
####################################
|
@@ -213,8 +216,8 @@ DEFAULT_VOICE_TRANSCRIPT = "In the first half of the 20th century, science ficti
|
|
213 |
OVERRIDE_INPUTS = {
|
214 |
'coqui/xtts': {
|
215 |
1: 'en',
|
216 |
-
2:
|
217 |
-
3:
|
218 |
4: False, #use_mic
|
219 |
5: False, #cleanup_reference
|
220 |
6: False, #auto_detect
|
@@ -248,7 +251,7 @@ OVERRIDE_INPUTS = {
|
|
248 |
1: 'LikeManyWaters', # voice
|
249 |
},
|
250 |
'LeeSangHoon/HierSpeech_TTS': {
|
251 |
-
1:
|
252 |
2: 0.333,
|
253 |
3: 0.333,
|
254 |
4: 1,
|
@@ -267,6 +270,13 @@ OVERRIDE_INPUTS = {
|
|
267 |
2: 1, # speed
|
268 |
3: 'EN', # language
|
269 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
'parler-tts/parler_tts': {
|
271 |
1: 'Elisabeth. Elisabeth\'s clear sharp voice.', # description/prompt
|
272 |
},
|
@@ -438,13 +448,13 @@ INSTR = """
|
|
438 |
## π³οΈ Vote
|
439 |
|
440 |
* Press β‘ to get cached sample pairs you've yet to vote on. (Fast π)
|
441 |
-
* Or press π² to randomly use
|
442 |
* Or input text (πΊπΈ English only) to synthesize audio. (Slowest π due to _Toxicity_ test)
|
443 |
* Listen to the two audio clips, one after the other.
|
444 |
-
*
|
445 |
-
*
|
446 |
|
447 |
-
Note: It may take up to 30 seconds to synthesize audio.
|
448 |
""".strip()
|
449 |
request = ''
|
450 |
if SPACE_ID:
|
@@ -1391,12 +1401,17 @@ with gr.Blocks() as vote:
|
|
1391 |
# bothbad.click(both_bad, outputs=outputs, inputs=[model1, model2, useridstate])
|
1392 |
# bothgood.click(both_good, outputs=outputs, inputs=[model1, model2, useridstate])
|
1393 |
|
1394 |
-
|
1395 |
-
|
1396 |
-
|
1397 |
-
|
1398 |
-
|
1399 |
-
|
|
|
|
|
|
|
|
|
|
|
1400 |
|
1401 |
with gr.Blocks() as about:
|
1402 |
gr.Markdown(ABOUT)
|
@@ -1407,6 +1422,7 @@ with gr.Blocks() as about:
|
|
1407 |
# dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
|
1408 |
# ddb = gr.Button("Delete DB")
|
1409 |
# ddb.click(del_db, inputs=dbtext, outputs=ddb)
|
|
|
1410 |
with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none} .blurred-text {filter: blur(0.15em);}", js="cookie.js", title="TTS Arena") as demo:
|
1411 |
gr.Markdown(DESCR)
|
1412 |
# gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
|
|
|
44 |
sents += f.read().strip().splitlines()
|
45 |
with open('llama3_command-r_sentences.txt') as f:
|
46 |
sents += f.read().strip().splitlines()
|
47 |
+
|
48 |
+
# Credit: llama3_command-r sentences generated made by user KingNish
|
49 |
+
|
50 |
####################################
|
51 |
# Constants
|
52 |
####################################
|
|
|
216 |
OVERRIDE_INPUTS = {
|
217 |
'coqui/xtts': {
|
218 |
1: 'en',
|
219 |
+
2: 'https://huggingface.co/spaces/coqui/xtts/resolve/main/examples/female.wav', # voice sample
|
220 |
+
3: 'https://huggingface.co/spaces/coqui/xtts/resolve/main/examples/female.wav', # mic voice sample
|
221 |
4: False, #use_mic
|
222 |
5: False, #cleanup_reference
|
223 |
6: False, #auto_detect
|
|
|
251 |
1: 'LikeManyWaters', # voice
|
252 |
},
|
253 |
'LeeSangHoon/HierSpeech_TTS': {
|
254 |
+
1: file('https://huggingface.co/spaces/LeeSangHoon/HierSpeech_TTS/resolve/main/example/female.wav'), # voice sample
|
255 |
2: 0.333,
|
256 |
3: 0.333,
|
257 |
4: 1,
|
|
|
270 |
2: 1, # speed
|
271 |
3: 'EN', # language
|
272 |
},
|
273 |
+
'mrfakename/MetaVoice-1B-v0.1': {
|
274 |
+
1: 5, # float (numeric value between 0.0 and 10.0) in 'Speech Stability - improves text following for a challenging speaker' Slider component
|
275 |
+
2: 5, # float (numeric value between 1.0 and 5.0) in 'Speaker similarity - How closely to match speaker identity and speech style.' Slider component
|
276 |
+
3: "Preset voices", # Literal['Preset voices', 'Upload target voice'] in 'Choose voice' Radio component
|
277 |
+
4: "Bria", # Literal['Bria', 'Alex', 'Jacob'] in 'Preset voices' Dropdown component
|
278 |
+
5: None, # filepath in 'Upload a clean sample to clone. Sample should contain 1 speaker, be between 30-90 seconds and not contain background noise.' Audio component
|
279 |
+
},
|
280 |
'parler-tts/parler_tts': {
|
281 |
1: 'Elisabeth. Elisabeth\'s clear sharp voice.', # description/prompt
|
282 |
},
|
|
|
448 |
## π³οΈ Vote
|
449 |
|
450 |
* Press β‘ to get cached sample pairs you've yet to vote on. (Fast π)
|
451 |
+
* Or press π² to randomly use a sentence from the list. (Slow π’)
|
452 |
* Or input text (πΊπΈ English only) to synthesize audio. (Slowest π due to _Toxicity_ test)
|
453 |
* Listen to the two audio clips, one after the other.
|
454 |
+
* _Vote on which audio sounds more natural to you._
|
455 |
+
* Model names are revealed after the vote is cast.
|
456 |
|
457 |
+
β Note: It **may take up to 30 seconds** to ***synthesize*** audio.
|
458 |
""".strip()
|
459 |
request = ''
|
460 |
if SPACE_ID:
|
|
|
1401 |
# bothbad.click(both_bad, outputs=outputs, inputs=[model1, model2, useridstate])
|
1402 |
# bothgood.click(both_good, outputs=outputs, inputs=[model1, model2, useridstate])
|
1403 |
|
1404 |
+
# get session cookie
|
1405 |
+
vote\
|
1406 |
+
.load(
|
1407 |
+
None,
|
1408 |
+
None,
|
1409 |
+
session_hash,
|
1410 |
+
js="() => { return getArenaCookie('session') }",
|
1411 |
+
)
|
1412 |
+
# give a cached sample pair to voter; .then() did not work here
|
1413 |
+
vote\
|
1414 |
+
.load(give_cached_sample, inputs=[session_hash], outputs=[*outputs, cachedt])
|
1415 |
|
1416 |
with gr.Blocks() as about:
|
1417 |
gr.Markdown(ABOUT)
|
|
|
1422 |
# dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
|
1423 |
# ddb = gr.Button("Delete DB")
|
1424 |
# ddb.click(del_db, inputs=dbtext, outputs=ddb)
|
1425 |
+
# Blur cached sample text so the voting user picks up mispronouncements
|
1426 |
with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none} .blurred-text {filter: blur(0.15em);}", js="cookie.js", title="TTS Arena") as demo:
|
1427 |
gr.Markdown(DESCR)
|
1428 |
# gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
|