Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -60,7 +60,7 @@ print(f"Using device: {device}, dtype: {dtype}")
|
|
60 |
|
61 |
pipe = pipeline(
|
62 |
"automatic-speech-recognition",
|
63 |
-
model="
|
64 |
torch_dtype=torch.float16,
|
65 |
device=device,
|
66 |
)
|
@@ -110,8 +110,8 @@ def load_custom(ckpt_path: str, vocab_path="", model_cfg=None):
|
|
110 |
return load_model(DiT, model_cfg, ckpt_path, vocab_file=vocab_path)
|
111 |
|
112 |
|
113 |
-
F2TTS_ema_model3 = load_f5tts()
|
114 |
-
E2TTS_ema_model4 = load_e2tts() if USING_SPACES else None
|
115 |
custom_ema_model, pre_custom_path = None, ""
|
116 |
|
117 |
chat_model_state = None
|
@@ -212,7 +212,7 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
|
|
212 |
gen_text_len = len(gen_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gen_text))
|
213 |
|
214 |
# Calculate duration based on the lengths of ref_text and gen_text
|
215 |
-
duration = min(2000, max(270, int(
|
216 |
|
217 |
# Print the calculated duration
|
218 |
print(f"Duration: {duration} seconds")
|
|
|
60 |
|
61 |
pipe = pipeline(
|
62 |
"automatic-speech-recognition",
|
63 |
+
model="openai/whisper-large-v3-turbo",
|
64 |
torch_dtype=torch.float16,
|
65 |
device=device,
|
66 |
)
|
|
|
110 |
return load_model(DiT, model_cfg, ckpt_path, vocab_file=vocab_path)
|
111 |
|
112 |
|
113 |
+
#F2TTS_ema_model3 = load_f5tts()
|
114 |
+
#E2TTS_ema_model4 = load_e2tts() if USING_SPACES else None
|
115 |
custom_ema_model, pre_custom_path = None, ""
|
116 |
|
117 |
chat_model_state = None
|
|
|
212 |
gen_text_len = len(gen_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gen_text))
|
213 |
|
214 |
# Calculate duration based on the lengths of ref_text and gen_text
|
215 |
+
duration = min(2000, max(270, int( (ref_audio_len + (ref_audio_len / ref_text_len * gen_text_len / speed))))
|
216 |
|
217 |
# Print the calculated duration
|
218 |
print(f"Duration: {duration} seconds")
|