Spaces:
Running
on
Zero
Running
on
Zero
F5 TTS API nfe_slider param fix
Browse files- app.py +7 -6
- test_tts_e2_f5_f5.py +1 -0
app.py
CHANGED
@@ -427,7 +427,8 @@ OVERRIDE_INPUTS = {
|
|
427 |
1: DEFAULT_VOICE_TRANSCRIPT, # transcript of sample (< 15 seconds required)
|
428 |
3: False, # cleanup silence
|
429 |
4: 0.15, #crossfade
|
430 |
-
5:
|
|
|
431 |
},
|
432 |
|
433 |
# IMS-Toucan
|
@@ -1135,7 +1136,7 @@ def synthandreturn(text, request: gr.Request):
|
|
1135 |
# assume the index is one of the first 9 return params
|
1136 |
return_audio_index = int(HF_SPACES[model]['return_audio_index'])
|
1137 |
endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
1138 |
-
|
1139 |
api_name = None
|
1140 |
fn_index = None
|
1141 |
end_parameters = None
|
@@ -1143,7 +1144,7 @@ def synthandreturn(text, request: gr.Request):
|
|
1143 |
if '/' == HF_SPACES[model]['function'][0]:
|
1144 |
# audio sync function name
|
1145 |
api_name = HF_SPACES[model]['function']
|
1146 |
-
|
1147 |
end_parameters = _get_param_examples(
|
1148 |
endpoints['named_endpoints'][api_name]['parameters']
|
1149 |
)
|
@@ -1151,7 +1152,7 @@ def synthandreturn(text, request: gr.Request):
|
|
1151 |
else:
|
1152 |
# endpoint index is the first character
|
1153 |
fn_index = int(HF_SPACES[model]['function'])
|
1154 |
-
|
1155 |
end_parameters = _get_param_examples(
|
1156 |
endpoints['unnamed_endpoints'][str(fn_index)]['parameters']
|
1157 |
)
|
@@ -1248,7 +1249,7 @@ def synthandreturn(text, request: gr.Request):
|
|
1248 |
pass
|
1249 |
|
1250 |
return inputs
|
1251 |
-
|
1252 |
def _cache_sample(text, model):
|
1253 |
# skip caching if not hardcoded sentence
|
1254 |
if (text not in sents):
|
@@ -1310,7 +1311,7 @@ def synthandreturn(text, request: gr.Request):
|
|
1310 |
# cache the result
|
1311 |
for model in [mdl1k, mdl2k]:
|
1312 |
_cache_sample(text, model)
|
1313 |
-
|
1314 |
#debug
|
1315 |
# print(results)
|
1316 |
# print(list(results.keys())[0])
|
|
|
427 |
1: DEFAULT_VOICE_TRANSCRIPT, # transcript of sample (< 15 seconds required)
|
428 |
3: False, # cleanup silence
|
429 |
4: 0.15, #crossfade
|
430 |
+
5: 32, #nfe_slider
|
431 |
+
6: 1, #speed
|
432 |
},
|
433 |
|
434 |
# IMS-Toucan
|
|
|
1136 |
# assume the index is one of the first 9 return params
|
1137 |
return_audio_index = int(HF_SPACES[model]['return_audio_index'])
|
1138 |
endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
1139 |
+
|
1140 |
api_name = None
|
1141 |
fn_index = None
|
1142 |
end_parameters = None
|
|
|
1144 |
if '/' == HF_SPACES[model]['function'][0]:
|
1145 |
# audio sync function name
|
1146 |
api_name = HF_SPACES[model]['function']
|
1147 |
+
|
1148 |
end_parameters = _get_param_examples(
|
1149 |
endpoints['named_endpoints'][api_name]['parameters']
|
1150 |
)
|
|
|
1152 |
else:
|
1153 |
# endpoint index is the first character
|
1154 |
fn_index = int(HF_SPACES[model]['function'])
|
1155 |
+
|
1156 |
end_parameters = _get_param_examples(
|
1157 |
endpoints['unnamed_endpoints'][str(fn_index)]['parameters']
|
1158 |
)
|
|
|
1249 |
pass
|
1250 |
|
1251 |
return inputs
|
1252 |
+
|
1253 |
def _cache_sample(text, model):
|
1254 |
# skip caching if not hardcoded sentence
|
1255 |
if (text not in sents):
|
|
|
1311 |
# cache the result
|
1312 |
for model in [mdl1k, mdl2k]:
|
1313 |
_cache_sample(text, model)
|
1314 |
+
|
1315 |
#debug
|
1316 |
# print(results)
|
1317 |
# print(list(results.keys())[0])
|
test_tts_e2_f5_f5.py
CHANGED
@@ -9,6 +9,7 @@ result = client.predict(
|
|
9 |
gen_text_input="Please surprise me and speak in whatever voice you enjoy.",
|
10 |
remove_silence=False,
|
11 |
cross_fade_duration_slider=0.15,
|
|
|
12 |
speed_slider=1,
|
13 |
api_name="/basic_tts",
|
14 |
)
|
|
|
9 |
gen_text_input="Please surprise me and speak in whatever voice you enjoy.",
|
10 |
remove_silence=False,
|
11 |
cross_fade_duration_slider=0.15,
|
12 |
+
nfe_slider=32,
|
13 |
speed_slider=1,
|
14 |
api_name="/basic_tts",
|
15 |
)
|