Spaces:
Running
Running
Broulaye Doumbia commited on
Commit ·
618eab5
1
Parent(s): 71bb3bc
Fix model loading stuck forever + unhandled TTS crash in conversation mode
Browse files_do_load_whisper had import statements outside the try/except block — if
torch or transformers imports failed the thread crashed silently, leaving
_model_status permanently at "loading…" with no retry. Wrap the entire
function body so any exception is captured. Also add a 180s stuck-load
timeout in _ensure_whisper_loaded so the timer can recover and retry.
Wrap _tts.synthesize() in try/except in both conversation and sensor
modes so a TTS failure returns the text response instead of crashing
the whole handler.
Align .env.example with the variable names app.py actually reads
(WHISPER_MODEL_ID, LLM_MODEL_ID) and re-read LLM_MODEL_ID after
load_dotenv() in __main__.
Made-with: Cursor
- .env.example +5 -2
- app.py +39 -16
.env.example
CHANGED
|
@@ -1,8 +1,11 @@
|
|
| 1 |
# HuggingFace read token (required for accessing google/waxal dataset)
|
| 2 |
HF_TOKEN=hf_your_token_here
|
| 3 |
|
| 4 |
-
#
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# Adapter paths (relative to project root)
|
| 8 |
BAMBARA_ADAPTER_PATH=./adapters/bambara
|
|
|
|
| 1 |
# HuggingFace read token (required for accessing google/waxal dataset)
|
| 2 |
HF_TOKEN=hf_your_token_here
|
| 3 |
|
| 4 |
+
# Whisper model (used by app.py Gradio UI)
|
| 5 |
+
WHISPER_MODEL_ID=openai/whisper-large-v3-turbo
|
| 6 |
+
|
| 7 |
+
# LLM for conversation mode (HF Inference API)
|
| 8 |
+
LLM_MODEL_ID=Qwen/Qwen2.5-7B-Instruct
|
| 9 |
|
| 10 |
# Adapter paths (relative to project root)
|
| 11 |
BAMBARA_ADAPTER_PATH=./adapters/bambara
|
app.py
CHANGED
|
@@ -67,6 +67,8 @@ _whisper_processor = None
|
|
| 67 |
_fine_tuned_models = {} # lang_code -> WhisperForConditionalGeneration (full checkpoint)
|
| 68 |
_model_lock = threading.Lock()
|
| 69 |
_model_status = "not loaded"
|
|
|
|
|
|
|
| 70 |
|
| 71 |
# ── Conversation-mode state ───────────────────────────────────────────────────
|
| 72 |
_voice_ref_path: str | None = None # path to 24 kHz WAV converted from user MP3
|
|
@@ -97,18 +99,16 @@ if HF_TOKEN:
|
|
| 97 |
|
| 98 |
def _do_load_whisper():
|
| 99 |
global _whisper_model, _whisper_processor, _model_status
|
| 100 |
-
import torch
|
| 101 |
-
|
| 102 |
-
# Import concrete Whisper classes directly — bypasses transformers __init__.py
|
| 103 |
-
# Auto-class exports differ between transformers 4.x and 5.x; direct paths are stable.
|
| 104 |
try:
|
| 105 |
-
|
| 106 |
-
except ImportError:
|
| 107 |
-
from transformers.models.whisper.processing_whisper import WhisperProcessor
|
| 108 |
-
from transformers.models.whisper.modeling_whisper import WhisperForConditionalGeneration
|
| 109 |
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
_whisper_processor = WhisperProcessor.from_pretrained(
|
| 113 |
WHISPER_MODEL_ID, token=HF_TOKEN
|
| 114 |
)
|
|
@@ -131,11 +131,22 @@ def _do_load_whisper():
|
|
| 131 |
|
| 132 |
def _ensure_whisper_loaded():
|
| 133 |
"""Load Whisper to CPU in a background thread on first call. Non-blocking."""
|
| 134 |
-
|
|
|
|
| 135 |
with _model_lock:
|
| 136 |
-
|
| 137 |
if _whisper_model is None and "loading" not in _model_status:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
_model_status = "loading…"
|
|
|
|
| 139 |
t = threading.Thread(target=_do_load_whisper, daemon=True)
|
| 140 |
t.start()
|
| 141 |
return _model_status
|
|
@@ -1410,8 +1421,12 @@ def _do_respond(
|
|
| 1410 |
except Exception:
|
| 1411 |
pass
|
| 1412 |
if audio_out is None:
|
| 1413 |
-
|
| 1414 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1415 |
|
| 1416 |
return "", response_text, audio_out, new_history, chat_msgs
|
| 1417 |
|
|
@@ -1441,9 +1456,15 @@ def _do_respond(
|
|
| 1441 |
elif lang == "ful":
|
| 1442 |
response_text, english_translation = FULA_TEMPLATES["not_understood"]
|
| 1443 |
|
| 1444 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1445 |
chat_msgs = [[u, v] for u, v in history]
|
| 1446 |
-
return english_translation, response_text,
|
| 1447 |
|
| 1448 |
|
| 1449 |
# ── Gradio UI ─────────────────────────────────────────────────────────────────
|
|
@@ -1927,6 +1948,7 @@ if __name__ == "__main__":
|
|
| 1927 |
FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
|
| 1928 |
ADAPTER_REPO_ID = os.environ.get("ADAPTER_REPO_ID", "ous-sow/sahel-agri-adapters")
|
| 1929 |
WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-small")
|
|
|
|
| 1930 |
|
| 1931 |
if HF_TOKEN:
|
| 1932 |
from huggingface_hub import HfApi
|
|
@@ -1939,6 +1961,7 @@ if __name__ == "__main__":
|
|
| 1939 |
_ensure_whisper_loaded()
|
| 1940 |
|
| 1941 |
print(f"Whisper model : {WHISPER_MODEL_ID}")
|
|
|
|
| 1942 |
print(f"Feedback repo : {FEEDBACK_REPO_ID}")
|
| 1943 |
print(f"Adapter repo : {ADAPTER_REPO_ID}")
|
| 1944 |
print(f"HF_TOKEN set : {'yes' if HF_TOKEN else 'no (local-only mode)'}")
|
|
|
|
| 67 |
_fine_tuned_models = {} # lang_code -> WhisperForConditionalGeneration (full checkpoint)
|
| 68 |
_model_lock = threading.Lock()
|
| 69 |
_model_status = "not loaded"
|
| 70 |
+
_load_started_at: float = 0.0 # monotonic time when loading began
|
| 71 |
+
_LOAD_TIMEOUT = 180 # seconds before declaring a stuck load
|
| 72 |
|
| 73 |
# ── Conversation-mode state ───────────────────────────────────────────────────
|
| 74 |
_voice_ref_path: str | None = None # path to 24 kHz WAV converted from user MP3
|
|
|
|
| 99 |
|
| 100 |
def _do_load_whisper():
|
| 101 |
global _whisper_model, _whisper_processor, _model_status
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
try:
|
| 103 |
+
import torch
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
+
try:
|
| 106 |
+
from transformers.models.whisper import WhisperProcessor, WhisperForConditionalGeneration
|
| 107 |
+
except ImportError:
|
| 108 |
+
from transformers.models.whisper.processing_whisper import WhisperProcessor
|
| 109 |
+
from transformers.models.whisper.modeling_whisper import WhisperForConditionalGeneration
|
| 110 |
+
|
| 111 |
+
_model_status = "loading…"
|
| 112 |
_whisper_processor = WhisperProcessor.from_pretrained(
|
| 113 |
WHISPER_MODEL_ID, token=HF_TOKEN
|
| 114 |
)
|
|
|
|
| 131 |
|
| 132 |
def _ensure_whisper_loaded():
|
| 133 |
"""Load Whisper to CPU in a background thread on first call. Non-blocking."""
|
| 134 |
+
import time
|
| 135 |
+
global _model_status, _load_started_at
|
| 136 |
with _model_lock:
|
| 137 |
+
need_start = False
|
| 138 |
if _whisper_model is None and "loading" not in _model_status:
|
| 139 |
+
need_start = True
|
| 140 |
+
elif (_whisper_model is None
|
| 141 |
+
and "loading" in _model_status
|
| 142 |
+
and _load_started_at
|
| 143 |
+
and (time.monotonic() - _load_started_at) > _LOAD_TIMEOUT):
|
| 144 |
+
_model_status = "error: load timed out after %ds — retrying" % _LOAD_TIMEOUT
|
| 145 |
+
need_start = True
|
| 146 |
+
|
| 147 |
+
if need_start:
|
| 148 |
_model_status = "loading…"
|
| 149 |
+
_load_started_at = time.monotonic()
|
| 150 |
t = threading.Thread(target=_do_load_whisper, daemon=True)
|
| 151 |
t.start()
|
| 152 |
return _model_status
|
|
|
|
| 1421 |
except Exception:
|
| 1422 |
pass
|
| 1423 |
if audio_out is None:
|
| 1424 |
+
try:
|
| 1425 |
+
wav_np, sr = _tts.synthesize(response_text, lang, device=device)
|
| 1426 |
+
audio_out = (sr, wav_np)
|
| 1427 |
+
except Exception as tts_err:
|
| 1428 |
+
import logging
|
| 1429 |
+
logging.getLogger(__name__).warning("TTS error: %s", tts_err)
|
| 1430 |
|
| 1431 |
return "", response_text, audio_out, new_history, chat_msgs
|
| 1432 |
|
|
|
|
| 1456 |
elif lang == "ful":
|
| 1457 |
response_text, english_translation = FULA_TEMPLATES["not_understood"]
|
| 1458 |
|
| 1459 |
+
audio_out = None
|
| 1460 |
+
try:
|
| 1461 |
+
wav_np, sr = _tts.synthesize(response_text, lang, device=device)
|
| 1462 |
+
audio_out = (sr, wav_np)
|
| 1463 |
+
except Exception as tts_err:
|
| 1464 |
+
import logging
|
| 1465 |
+
logging.getLogger(__name__).warning("TTS error: %s", tts_err)
|
| 1466 |
chat_msgs = [[u, v] for u, v in history]
|
| 1467 |
+
return english_translation, response_text, audio_out, history, chat_msgs
|
| 1468 |
|
| 1469 |
|
| 1470 |
# ── Gradio UI ─────────────────────────────────────────────────────────────────
|
|
|
|
| 1948 |
FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
|
| 1949 |
ADAPTER_REPO_ID = os.environ.get("ADAPTER_REPO_ID", "ous-sow/sahel-agri-adapters")
|
| 1950 |
WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-small")
|
| 1951 |
+
LLM_MODEL_ID = os.environ.get("LLM_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
|
| 1952 |
|
| 1953 |
if HF_TOKEN:
|
| 1954 |
from huggingface_hub import HfApi
|
|
|
|
| 1961 |
_ensure_whisper_loaded()
|
| 1962 |
|
| 1963 |
print(f"Whisper model : {WHISPER_MODEL_ID}")
|
| 1964 |
+
print(f"LLM model : {LLM_MODEL_ID}")
|
| 1965 |
print(f"Feedback repo : {FEEDBACK_REPO_ID}")
|
| 1966 |
print(f"Adapter repo : {ADAPTER_REPO_ID}")
|
| 1967 |
print(f"HF_TOKEN set : {'yes' if HF_TOKEN else 'no (local-only mode)'}")
|