jefffffff9 Claude Sonnet 4.6 commited on
Commit
1958814
·
1 Parent(s): 8d7d9d8

Fix 'Model loading' stuck state: block in _do_asr until Whisper is ready

Browse files

_ensure_whisper_loaded() fires a background thread and returns immediately.
_do_asr checked _whisper_model is None and returned a loading message —
but nothing ever re-triggered the pipeline after the model finished loading,
so the UI stayed stuck on that message forever.

Fix: _wait_for_whisper(timeout=120) polls _whisper_model every 0.5s until
ready or timeout. _do_asr and handle_ask now call this instead of the
non-blocking _ensure_whisper_loaded(). On a cold start the first request
waits (showing a spinner in the Gradio component) and then runs normally
— no retry needed by the user.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +23 -8
app.py CHANGED
@@ -141,6 +141,23 @@ def _ensure_whisper_loaded():
141
  return _model_status
142
 
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  def get_model_status() -> str:
145
  s = _ensure_whisper_loaded()
146
  if "ready" in s:
@@ -1269,10 +1286,8 @@ def handle_ask(audio_path, language_label, convo_mode: bool = False, history: li
1269
  return "⚠️ No audio — press Record or upload a file.", "", "", None, history
1270
 
1271
  language_code = SUPPORTED_LANGUAGES.get(language_label, "bam")
1272
- status = _ensure_whisper_loaded()
1273
-
1274
- if _whisper_model is None:
1275
- return f"⏳ Model loading ({status}). Wait a moment and try again.", "", "", None, history
1276
 
1277
  try:
1278
  if convo_mode:
@@ -1292,14 +1307,14 @@ def handle_ask(audio_path, language_label, convo_mode: bool = False, history: li
1292
  def _do_asr(audio_path: str, language_label: str) -> str:
1293
  """
1294
  Stage 1 — Whisper only. Returns the transcript string (or error/status).
1295
- Completes in ~3-8s on cpu-basic so the user sees what was heard immediately.
 
1296
  """
1297
  if audio_path is None:
1298
  return "⚠️ No audio — press Record or upload a file."
1299
  lang = SUPPORTED_LANGUAGES.get(language_label, "bam")
1300
- status = _ensure_whisper_loaded()
1301
- if _whisper_model is None:
1302
- return f"⏳ Model loading ({status}). Wait a moment and try again."
1303
  try:
1304
  import torch, librosa
1305
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
141
  return _model_status
142
 
143
 
144
+ def _wait_for_whisper(timeout: int = 120) -> bool:
145
+ """
146
+ Block until Whisper is loaded or timeout (seconds) expires.
147
+ Triggers loading if not already started. Returns True if model is ready.
148
+ """
149
+ import time
150
+ _ensure_whisper_loaded()
151
+ deadline = time.monotonic() + timeout
152
+ while time.monotonic() < deadline:
153
+ if _whisper_model is not None:
154
+ return True
155
+ if "error" in _model_status:
156
+ return False
157
+ time.sleep(0.5)
158
+ return False
159
+
160
+
161
  def get_model_status() -> str:
162
  s = _ensure_whisper_loaded()
163
  if "ready" in s:
 
1286
  return "⚠️ No audio — press Record or upload a file.", "", "", None, history
1287
 
1288
  language_code = SUPPORTED_LANGUAGES.get(language_label, "bam")
1289
+ if not _wait_for_whisper(timeout=120):
1290
+ return f"❌ Model failed to load: {_model_status}", "", "", None, history
 
 
1291
 
1292
  try:
1293
  if convo_mode:
 
1307
  def _do_asr(audio_path: str, language_label: str) -> str:
1308
  """
1309
  Stage 1 — Whisper only. Returns the transcript string (or error/status).
1310
+ Blocks until the model is ready (up to 120 s) so the first request after a
1311
+ cold start works without the user needing to retry.
1312
  """
1313
  if audio_path is None:
1314
  return "⚠️ No audio — press Record or upload a file."
1315
  lang = SUPPORTED_LANGUAGES.get(language_label, "bam")
1316
+ if not _wait_for_whisper(timeout=120):
1317
+ return f"❌ Model failed to load: {_model_status}"
 
1318
  try:
1319
  import torch, librosa
1320
  device = "cuda" if torch.cuda.is_available() else "cpu"