Gregniuki commited on
Commit
cc95ac6
·
verified ·
1 Parent(s): 2a5272b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -60,7 +60,7 @@ print(f"Using device: {device}, dtype: {dtype}")
60
 
61
  pipe = pipeline(
62
  "automatic-speech-recognition",
63
- model="Aspik101/whisper-small-pl",
64
  torch_dtype=torch.float16,
65
  device=device,
66
  )
@@ -110,8 +110,8 @@ def load_custom(ckpt_path: str, vocab_path="", model_cfg=None):
110
  return load_model(DiT, model_cfg, ckpt_path, vocab_file=vocab_path)
111
 
112
 
113
- F2TTS_ema_model3 = load_f5tts()
114
- E2TTS_ema_model4 = load_e2tts() if USING_SPACES else None
115
  custom_ema_model, pre_custom_path = None, ""
116
 
117
  chat_model_state = None
@@ -212,7 +212,7 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
212
  gen_text_len = len(gen_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gen_text))
213
 
214
  # Calculate duration based on the lengths of ref_text and gen_text
215
- duration = min(2000, max(270, int(0.75 * (ref_audio_len + ref_audio_len / ref_text_len * gen_text_len / speed))))
216
 
217
  # Print the calculated duration
218
  print(f"Duration: {duration} seconds")
 
60
 
61
  pipe = pipeline(
62
  "automatic-speech-recognition",
63
+ model="openai/whisper-large-v3-turbo",
64
  torch_dtype=torch.float16,
65
  device=device,
66
  )
 
110
  return load_model(DiT, model_cfg, ckpt_path, vocab_file=vocab_path)
111
 
112
 
113
+ #F2TTS_ema_model3 = load_f5tts()
114
+ #E2TTS_ema_model4 = load_e2tts() if USING_SPACES else None
115
  custom_ema_model, pre_custom_path = None, ""
116
 
117
  chat_model_state = None
 
212
  gen_text_len = len(gen_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gen_text))
213
 
214
  # Calculate duration based on the lengths of ref_text and gen_text
215
+ duration = min(2000, max(270, int( (ref_audio_len + (ref_audio_len / ref_text_len * gen_text_len / speed))))
216
 
217
  # Print the calculated duration
218
  print(f"Duration: {duration} seconds")