Spaces:

MataStrategy
/

ground-zero

Running

Broulaye Doumbia commited on 22 days ago

Commit

618eab5

1 Parent(s): 71bb3bc

Fix model loading stuck forever + unhandled TTS crash in conversation mode

_do_load_whisper had import statements outside the try/except block — if
torch or transformers imports failed the thread crashed silently, leaving
_model_status permanently at "loading…" with no retry. Wrap the entire
function body so any exception is captured. Also add a 180s stuck-load
timeout in _ensure_whisper_loaded so the timer can recover and retry.

Wrap _tts.synthesize() in try/except in both conversation and sensor
modes so a TTS failure returns the text response instead of crashing
the whole handler.

Align .env.example with the variable names app.py actually reads
(WHISPER_MODEL_ID, LLM_MODEL_ID) and re-read LLM_MODEL_ID after
load_dotenv() in __main__.

Made-with: Cursor

Files changed (2) hide show

.env.example +5 -2
app.py +39 -16

.env.example CHANGED Viewed

@@ -1,8 +1,11 @@
 # HuggingFace read token (required for accessing google/waxal dataset)
 HF_TOKEN=hf_your_token_here
-# Model
-MODEL_ID=openai/whisper-large-v3-turbo
 # Adapter paths (relative to project root)
 BAMBARA_ADAPTER_PATH=./adapters/bambara

 # HuggingFace read token (required for accessing google/waxal dataset)
 HF_TOKEN=hf_your_token_here
+# Whisper model (used by app.py Gradio UI)
+WHISPER_MODEL_ID=openai/whisper-large-v3-turbo
+# LLM for conversation mode (HF Inference API)
+LLM_MODEL_ID=Qwen/Qwen2.5-7B-Instruct
 # Adapter paths (relative to project root)
 BAMBARA_ADAPTER_PATH=./adapters/bambara

app.py CHANGED Viewed

@@ -67,6 +67,8 @@ _whisper_processor  = None
 _fine_tuned_models  = {}     # lang_code -> WhisperForConditionalGeneration (full checkpoint)
 _model_lock         = threading.Lock()
 _model_status       = "not loaded"
 # ── Conversation-mode state ───────────────────────────────────────────────────
 _voice_ref_path: str | None = None   # path to 24 kHz WAV converted from user MP3
@@ -97,18 +99,16 @@ if HF_TOKEN:
 def _do_load_whisper():
     global _whisper_model, _whisper_processor, _model_status
-    import torch
-    # Import concrete Whisper classes directly — bypasses transformers __init__.py
-    # Auto-class exports differ between transformers 4.x and 5.x; direct paths are stable.
     try:
-        from transformers.models.whisper import WhisperProcessor, WhisperForConditionalGeneration
-    except ImportError:
-        from transformers.models.whisper.processing_whisper import WhisperProcessor
-        from transformers.models.whisper.modeling_whisper import WhisperForConditionalGeneration
-    _model_status = "loading…"
-    try:
         _whisper_processor = WhisperProcessor.from_pretrained(
             WHISPER_MODEL_ID, token=HF_TOKEN
         )
@@ -131,11 +131,22 @@ def _do_load_whisper():
 def _ensure_whisper_loaded():
     """Load Whisper to CPU in a background thread on first call. Non-blocking."""
-    global _model_status
     with _model_lock:
-        # Retry if previous attempt errored (e.g. import failed on first try)
         if _whisper_model is None and "loading" not in _model_status:
             _model_status = "loading…"
             t = threading.Thread(target=_do_load_whisper, daemon=True)
             t.start()
     return _model_status
@@ -1410,8 +1421,12 @@ def _do_respond(
             except Exception:
                 pass
         if audio_out is None:
-            wav_np, sr = _tts.synthesize(response_text, lang, device=device)
-            audio_out = (sr, wav_np)
         return "", response_text, audio_out, new_history, chat_msgs
@@ -1441,9 +1456,15 @@ def _do_respond(
                 elif lang == "ful":
                     response_text, english_translation = FULA_TEMPLATES["not_understood"]
-        wav_np, sr = _tts.synthesize(response_text, lang, device=device)
         chat_msgs = [[u, v] for u, v in history]
-        return english_translation, response_text, (sr, wav_np), history, chat_msgs
 # ── Gradio UI ─────────────────────────────────────────────────────────────────
@@ -1927,6 +1948,7 @@ if __name__ == "__main__":
     FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
     ADAPTER_REPO_ID  = os.environ.get("ADAPTER_REPO_ID",  "ous-sow/sahel-agri-adapters")
     WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID",  "openai/whisper-small")
     if HF_TOKEN:
         from huggingface_hub import HfApi
@@ -1939,6 +1961,7 @@ if __name__ == "__main__":
     _ensure_whisper_loaded()
     print(f"Whisper model : {WHISPER_MODEL_ID}")
     print(f"Feedback repo : {FEEDBACK_REPO_ID}")
     print(f"Adapter repo  : {ADAPTER_REPO_ID}")
     print(f"HF_TOKEN set  : {'yes' if HF_TOKEN else 'no (local-only mode)'}")

 _fine_tuned_models  = {}     # lang_code -> WhisperForConditionalGeneration (full checkpoint)
 _model_lock         = threading.Lock()
 _model_status       = "not loaded"
+_load_started_at: float = 0.0   # monotonic time when loading began
+_LOAD_TIMEOUT       = 180       # seconds before declaring a stuck load
 # ── Conversation-mode state ───────────────────────────────────────────────────
 _voice_ref_path: str | None = None   # path to 24 kHz WAV converted from user MP3
 def _do_load_whisper():
     global _whisper_model, _whisper_processor, _model_status
     try:
+        import torch
+        try:
+            from transformers.models.whisper import WhisperProcessor, WhisperForConditionalGeneration
+        except ImportError:
+            from transformers.models.whisper.processing_whisper import WhisperProcessor
+            from transformers.models.whisper.modeling_whisper import WhisperForConditionalGeneration
+        _model_status = "loading…"
         _whisper_processor = WhisperProcessor.from_pretrained(
             WHISPER_MODEL_ID, token=HF_TOKEN
         )
 def _ensure_whisper_loaded():
     """Load Whisper to CPU in a background thread on first call. Non-blocking."""
+    import time
+    global _model_status, _load_started_at
     with _model_lock:
+        need_start = False
         if _whisper_model is None and "loading" not in _model_status:
+            need_start = True
+        elif (_whisper_model is None
+              and "loading" in _model_status
+              and _load_started_at
+              and (time.monotonic() - _load_started_at) > _LOAD_TIMEOUT):
+            _model_status = "error: load timed out after %ds — retrying" % _LOAD_TIMEOUT
+            need_start = True
+        if need_start:
             _model_status = "loading…"
+            _load_started_at = time.monotonic()
             t = threading.Thread(target=_do_load_whisper, daemon=True)
             t.start()
     return _model_status
             except Exception:
                 pass
         if audio_out is None:
+            try:
+                wav_np, sr = _tts.synthesize(response_text, lang, device=device)
+                audio_out = (sr, wav_np)
+            except Exception as tts_err:
+                import logging
+                logging.getLogger(__name__).warning("TTS error: %s", tts_err)
         return "", response_text, audio_out, new_history, chat_msgs
                 elif lang == "ful":
                     response_text, english_translation = FULA_TEMPLATES["not_understood"]
+        audio_out = None
+        try:
+            wav_np, sr = _tts.synthesize(response_text, lang, device=device)
+            audio_out = (sr, wav_np)
+        except Exception as tts_err:
+            import logging
+            logging.getLogger(__name__).warning("TTS error: %s", tts_err)
         chat_msgs = [[u, v] for u, v in history]
+        return english_translation, response_text, audio_out, history, chat_msgs
 # ── Gradio UI ─────────────────────────────────────────────────────────────────
     FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
     ADAPTER_REPO_ID  = os.environ.get("ADAPTER_REPO_ID",  "ous-sow/sahel-agri-adapters")
     WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID",  "openai/whisper-small")
+    LLM_MODEL_ID     = os.environ.get("LLM_MODEL_ID",      "Qwen/Qwen2.5-7B-Instruct")
     if HF_TOKEN:
         from huggingface_hub import HfApi
     _ensure_whisper_loaded()
     print(f"Whisper model : {WHISPER_MODEL_ID}")
+    print(f"LLM model     : {LLM_MODEL_ID}")
     print(f"Feedback repo : {FEEDBACK_REPO_ID}")
     print(f"Adapter repo  : {ADAPTER_REPO_ID}")
     print(f"HF_TOKEN set  : {'yes' if HF_TOKEN else 'no (local-only mode)'}")