Spaces:

MataStrategy
/

ground-zero

Running

jefffffff9 Claude Sonnet 4.6 commited on 27 days ago

Commit

1958814

1 Parent(s): 8d7d9d8

Fix 'Model loading' stuck state: block in _do_asr until Whisper is ready

_ensure_whisper_loaded() fires a background thread and returns immediately.
_do_asr checked _whisper_model is None and returned a loading message —
but nothing ever re-triggered the pipeline after the model finished loading,
so the UI stayed stuck on that message forever.

Fix: _wait_for_whisper(timeout=120) polls _whisper_model every 0.5s until
ready or timeout. _do_asr and handle_ask now call this instead of the
non-blocking _ensure_whisper_loaded(). On a cold start the first request
waits (showing a spinner in the Gradio component) and then runs normally
— no retry needed by the user.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show

app.py +23 -8

app.py CHANGED Viewed

@@ -141,6 +141,23 @@ def _ensure_whisper_loaded():
     return _model_status
 def get_model_status() -> str:
     s = _ensure_whisper_loaded()
     if "ready" in s:
@@ -1269,10 +1286,8 @@ def handle_ask(audio_path, language_label, convo_mode: bool = False, history: li
         return "⚠️ No audio — press Record or upload a file.", "", "", None, history
     language_code = SUPPORTED_LANGUAGES.get(language_label, "bam")
-    status = _ensure_whisper_loaded()
-    if _whisper_model is None:
-        return f"⏳ Model loading ({status}). Wait a moment and try again.", "", "", None, history
     try:
         if convo_mode:
@@ -1292,14 +1307,14 @@ def handle_ask(audio_path, language_label, convo_mode: bool = False, history: li
 def _do_asr(audio_path: str, language_label: str) -> str:
     """
     Stage 1 — Whisper only.  Returns the transcript string (or error/status).
-    Completes in ~3-8s on cpu-basic so the user sees what was heard immediately.
     """
     if audio_path is None:
         return "⚠️ No audio — press Record or upload a file."
     lang = SUPPORTED_LANGUAGES.get(language_label, "bam")
-    status = _ensure_whisper_loaded()
-    if _whisper_model is None:
-        return f"⏳ Model loading ({status}). Wait a moment and try again."
     try:
         import torch, librosa
         device = "cuda" if torch.cuda.is_available() else "cpu"

     return _model_status
+def _wait_for_whisper(timeout: int = 120) -> bool:
+    """
+    Block until Whisper is loaded or timeout (seconds) expires.
+    Triggers loading if not already started. Returns True if model is ready.
+    """
+    import time
+    _ensure_whisper_loaded()
+    deadline = time.monotonic() + timeout
+    while time.monotonic() < deadline:
+        if _whisper_model is not None:
+            return True
+        if "error" in _model_status:
+            return False
+        time.sleep(0.5)
+    return False
 def get_model_status() -> str:
     s = _ensure_whisper_loaded()
     if "ready" in s:
         return "⚠️ No audio — press Record or upload a file.", "", "", None, history
     language_code = SUPPORTED_LANGUAGES.get(language_label, "bam")
+    if not _wait_for_whisper(timeout=120):
+        return f"❌ Model failed to load: {_model_status}", "", "", None, history
     try:
         if convo_mode:
 def _do_asr(audio_path: str, language_label: str) -> str:
     """
     Stage 1 — Whisper only.  Returns the transcript string (or error/status).
+    Blocks until the model is ready (up to 120 s) so the first request after a
+    cold start works without the user needing to retry.
     """
     if audio_path is None:
         return "⚠️ No audio — press Record or upload a file."
     lang = SUPPORTED_LANGUAGES.get(language_label, "bam")
+    if not _wait_for_whisper(timeout=120):
+        return f"❌ Model failed to load: {_model_status}"
     try:
         import torch, librosa
         device = "cuda" if torch.cuda.is_available() else "cpu"