Nekochu commited on
Commit
9d04583
·
1 Parent(s): 5dedf2e

fix: 5h fixed timeout per file, check total feasibility first

Browse files
Files changed (1) hide show
  1. app.py +13 -14
app.py CHANGED
@@ -661,16 +661,15 @@ def gradio_main():
661
  yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
662
 
663
  if audio_to_caption and use_lm_caption and _server_ok():
664
- # --- Mode: GGUF LM captioning (slow, best quality) ---
665
- # 5h total budget — check if feasible first
666
  LM_TIMEOUT = 18000 # 5h per file
667
  est_total = int(total_dur * 7 + len(audio_to_caption) * 600)
668
  if est_total > LM_TIMEOUT:
669
- _log(f"[WARN] Estimated {est_total // 60} min for LM captioning "
670
- f"— exceeds 5h, switching to fast captioning")
671
  use_lm_caption = False
 
672
  else:
673
- _log(f"[INFO] LM captioning {len(audio_to_caption)} files...")
674
  yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
675
  for audio_fname, full_path, sidecar_json in audio_to_caption:
676
  if _training_cancel.is_set():
@@ -681,15 +680,15 @@ def gradio_main():
681
  full_path, timeout=LM_TIMEOUT,
682
  cancel_check=lambda: _training_cancel.is_set(),
683
  )
684
- if caption_data:
685
- bpm_s = caption_data.get("bpm", "?")
686
- key_s = caption_data.get("keyscale", caption_data.get("key", "?"))
687
- _log(f" {audio_fname}: OK (BPM={bpm_s}, key={key_s})")
688
- with open(sidecar_json, "w") as cj:
689
- json.dump(caption_data, cj)
690
- else:
691
- _log(f" {audio_fname}: LM failed, will use fast captioning")
692
- yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
693
 
694
  if audio_to_caption and not use_lm_caption:
695
  # --- Mode: Fast captioning (CLAP + Whisper + librosa) ---
 
661
  yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
662
 
663
  if audio_to_caption and use_lm_caption and _server_ok():
664
+ # --- Mode: GGUF LM captioning (best quality, 5h timeout per file) ---
 
665
  LM_TIMEOUT = 18000 # 5h per file
666
  est_total = int(total_dur * 7 + len(audio_to_caption) * 600)
667
  if est_total > LM_TIMEOUT:
668
+ _log(f"[WARN] Estimated {est_total // 60} min exceeds 5h, switching to fast captioning")
 
669
  use_lm_caption = False
670
+ yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
671
  else:
672
+ _log(f"[INFO] LM captioning {len(audio_to_caption)} files (5h timeout per file)...")
673
  yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
674
  for audio_fname, full_path, sidecar_json in audio_to_caption:
675
  if _training_cancel.is_set():
 
680
  full_path, timeout=LM_TIMEOUT,
681
  cancel_check=lambda: _training_cancel.is_set(),
682
  )
683
+ if caption_data:
684
+ bpm_s = caption_data.get("bpm", "?")
685
+ key_s = caption_data.get("keyscale", caption_data.get("key", "?"))
686
+ _log(f" {audio_fname}: OK (BPM={bpm_s}, key={key_s})")
687
+ with open(sidecar_json, "w") as cj:
688
+ json.dump(caption_data, cj)
689
+ else:
690
+ _log(f" {audio_fname}: LM failed")
691
+ yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
692
 
693
  if audio_to_caption and not use_lm_caption:
694
  # --- Mode: Fast captioning (CLAP + Whisper + librosa) ---