Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -182,10 +182,12 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
|
|
182 |
zh_pause_punc = r"。,、;:?!"
|
183 |
ref_text_len = len(ref_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, ref_text))
|
184 |
gen_text_len = len(gen_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gen_text))
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
|
|
|
|
189 |
|
190 |
# inference
|
191 |
with torch.inference_mode():
|
@@ -738,9 +740,9 @@ This is a local web UI for F5 TTS with advanced batch processing support. This a
|
|
738 |
|
739 |
The checkpoint support Polish English and German.
|
740 |
|
741 |
-
Generations using CPU takes usually 2-3 minutes
|
742 |
|
743 |
-
If you're having issues, try converting your reference audio to WAV or MP3, clipping it to
|
744 |
|
745 |
**NOTE: Reference text will be automatically transcribed with Whisper if not provided. For best results, keep your reference clips short (<15s). Ensure the audio is fully uploaded before generating.**
|
746 |
"""
|
|
|
182 |
zh_pause_punc = r"。,、;:?!"
|
183 |
ref_text_len = len(ref_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, ref_text))
|
184 |
gen_text_len = len(gen_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gen_text))
|
185 |
+
if len(ref_text) >= 1:
|
186 |
+
duration = ref_audio_len + int(ref_audio_len / ref_text_len * gen_text_len / speed)
|
187 |
+
print(f"Duration: {duration} seconds")
|
188 |
+
else:
|
189 |
+
duration = min(5000, max(300, int(133 * gen_text_len / (speed * 10))))
|
190 |
+
print(f"Duration: {duration} seconds")
|
191 |
|
192 |
# inference
|
193 |
with torch.inference_mode():
|
|
|
740 |
|
741 |
The checkpoint support Polish English and German.
|
742 |
|
743 |
+
Generations using CPU takes usually 2-3 minutes using 8 step inferece.
|
744 |
|
745 |
+
If you're having issues, try converting your reference audio to WAV or MP3, clipping it to 5s, and shortening your prompt.
|
746 |
|
747 |
**NOTE: Reference text will be automatically transcribed with Whisper if not provided. For best results, keep your reference clips short (<15s). Ensure the audio is fully uploaded before generating.**
|
748 |
"""
|