Broulaye Doumbia commited on
Commit
618eab5
·
1 Parent(s): 71bb3bc

Fix model loading stuck forever + unhandled TTS crash in conversation mode

Browse files

_do_load_whisper had import statements outside the try/except block — if
torch or transformers imports failed the thread crashed silently, leaving
_model_status permanently at "loading…" with no retry. Wrap the entire
function body so any exception is captured. Also add a 180s stuck-load
timeout in _ensure_whisper_loaded so the timer can recover and retry.

Wrap _tts.synthesize() in try/except in both conversation and sensor
modes so a TTS failure returns the text response instead of crashing
the whole handler.

Align .env.example with the variable names app.py actually reads
(WHISPER_MODEL_ID, LLM_MODEL_ID) and re-read LLM_MODEL_ID after
load_dotenv() in __main__.

Made-with: Cursor

Files changed (2) hide show
  1. .env.example +5 -2
  2. app.py +39 -16
.env.example CHANGED
@@ -1,8 +1,11 @@
1
  # HuggingFace read token (required for accessing google/waxal dataset)
2
  HF_TOKEN=hf_your_token_here
3
 
4
- # Model
5
- MODEL_ID=openai/whisper-large-v3-turbo
 
 
 
6
 
7
  # Adapter paths (relative to project root)
8
  BAMBARA_ADAPTER_PATH=./adapters/bambara
 
1
  # HuggingFace read token (required for accessing google/waxal dataset)
2
  HF_TOKEN=hf_your_token_here
3
 
4
+ # Whisper model (used by app.py Gradio UI)
5
+ WHISPER_MODEL_ID=openai/whisper-large-v3-turbo
6
+
7
+ # LLM for conversation mode (HF Inference API)
8
+ LLM_MODEL_ID=Qwen/Qwen2.5-7B-Instruct
9
 
10
  # Adapter paths (relative to project root)
11
  BAMBARA_ADAPTER_PATH=./adapters/bambara
app.py CHANGED
@@ -67,6 +67,8 @@ _whisper_processor = None
67
  _fine_tuned_models = {} # lang_code -> WhisperForConditionalGeneration (full checkpoint)
68
  _model_lock = threading.Lock()
69
  _model_status = "not loaded"
 
 
70
 
71
  # ── Conversation-mode state ───────────────────────────────────────────────────
72
  _voice_ref_path: str | None = None # path to 24 kHz WAV converted from user MP3
@@ -97,18 +99,16 @@ if HF_TOKEN:
97
 
98
  def _do_load_whisper():
99
  global _whisper_model, _whisper_processor, _model_status
100
- import torch
101
-
102
- # Import concrete Whisper classes directly — bypasses transformers __init__.py
103
- # Auto-class exports differ between transformers 4.x and 5.x; direct paths are stable.
104
  try:
105
- from transformers.models.whisper import WhisperProcessor, WhisperForConditionalGeneration
106
- except ImportError:
107
- from transformers.models.whisper.processing_whisper import WhisperProcessor
108
- from transformers.models.whisper.modeling_whisper import WhisperForConditionalGeneration
109
 
110
- _model_status = "loading…"
111
- try:
 
 
 
 
 
112
  _whisper_processor = WhisperProcessor.from_pretrained(
113
  WHISPER_MODEL_ID, token=HF_TOKEN
114
  )
@@ -131,11 +131,22 @@ def _do_load_whisper():
131
 
132
  def _ensure_whisper_loaded():
133
  """Load Whisper to CPU in a background thread on first call. Non-blocking."""
134
- global _model_status
 
135
  with _model_lock:
136
- # Retry if previous attempt errored (e.g. import failed on first try)
137
  if _whisper_model is None and "loading" not in _model_status:
 
 
 
 
 
 
 
 
 
138
  _model_status = "loading…"
 
139
  t = threading.Thread(target=_do_load_whisper, daemon=True)
140
  t.start()
141
  return _model_status
@@ -1410,8 +1421,12 @@ def _do_respond(
1410
  except Exception:
1411
  pass
1412
  if audio_out is None:
1413
- wav_np, sr = _tts.synthesize(response_text, lang, device=device)
1414
- audio_out = (sr, wav_np)
 
 
 
 
1415
 
1416
  return "", response_text, audio_out, new_history, chat_msgs
1417
 
@@ -1441,9 +1456,15 @@ def _do_respond(
1441
  elif lang == "ful":
1442
  response_text, english_translation = FULA_TEMPLATES["not_understood"]
1443
 
1444
- wav_np, sr = _tts.synthesize(response_text, lang, device=device)
 
 
 
 
 
 
1445
  chat_msgs = [[u, v] for u, v in history]
1446
- return english_translation, response_text, (sr, wav_np), history, chat_msgs
1447
 
1448
 
1449
  # ── Gradio UI ─────────────────────────────────────────────────────────────────
@@ -1927,6 +1948,7 @@ if __name__ == "__main__":
1927
  FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
1928
  ADAPTER_REPO_ID = os.environ.get("ADAPTER_REPO_ID", "ous-sow/sahel-agri-adapters")
1929
  WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-small")
 
1930
 
1931
  if HF_TOKEN:
1932
  from huggingface_hub import HfApi
@@ -1939,6 +1961,7 @@ if __name__ == "__main__":
1939
  _ensure_whisper_loaded()
1940
 
1941
  print(f"Whisper model : {WHISPER_MODEL_ID}")
 
1942
  print(f"Feedback repo : {FEEDBACK_REPO_ID}")
1943
  print(f"Adapter repo : {ADAPTER_REPO_ID}")
1944
  print(f"HF_TOKEN set : {'yes' if HF_TOKEN else 'no (local-only mode)'}")
 
67
  _fine_tuned_models = {} # lang_code -> WhisperForConditionalGeneration (full checkpoint)
68
  _model_lock = threading.Lock()
69
  _model_status = "not loaded"
70
+ _load_started_at: float = 0.0 # monotonic time when loading began
71
+ _LOAD_TIMEOUT = 180 # seconds before declaring a stuck load
72
 
73
  # ── Conversation-mode state ───────────────────────────────────────────────────
74
  _voice_ref_path: str | None = None # path to 24 kHz WAV converted from user MP3
 
99
 
100
  def _do_load_whisper():
101
  global _whisper_model, _whisper_processor, _model_status
 
 
 
 
102
  try:
103
+ import torch
 
 
 
104
 
105
+ try:
106
+ from transformers.models.whisper import WhisperProcessor, WhisperForConditionalGeneration
107
+ except ImportError:
108
+ from transformers.models.whisper.processing_whisper import WhisperProcessor
109
+ from transformers.models.whisper.modeling_whisper import WhisperForConditionalGeneration
110
+
111
+ _model_status = "loading…"
112
  _whisper_processor = WhisperProcessor.from_pretrained(
113
  WHISPER_MODEL_ID, token=HF_TOKEN
114
  )
 
131
 
132
  def _ensure_whisper_loaded():
133
  """Load Whisper to CPU in a background thread on first call. Non-blocking."""
134
+ import time
135
+ global _model_status, _load_started_at
136
  with _model_lock:
137
+ need_start = False
138
  if _whisper_model is None and "loading" not in _model_status:
139
+ need_start = True
140
+ elif (_whisper_model is None
141
+ and "loading" in _model_status
142
+ and _load_started_at
143
+ and (time.monotonic() - _load_started_at) > _LOAD_TIMEOUT):
144
+ _model_status = "error: load timed out after %ds — retrying" % _LOAD_TIMEOUT
145
+ need_start = True
146
+
147
+ if need_start:
148
  _model_status = "loading…"
149
+ _load_started_at = time.monotonic()
150
  t = threading.Thread(target=_do_load_whisper, daemon=True)
151
  t.start()
152
  return _model_status
 
1421
  except Exception:
1422
  pass
1423
  if audio_out is None:
1424
+ try:
1425
+ wav_np, sr = _tts.synthesize(response_text, lang, device=device)
1426
+ audio_out = (sr, wav_np)
1427
+ except Exception as tts_err:
1428
+ import logging
1429
+ logging.getLogger(__name__).warning("TTS error: %s", tts_err)
1430
 
1431
  return "", response_text, audio_out, new_history, chat_msgs
1432
 
 
1456
  elif lang == "ful":
1457
  response_text, english_translation = FULA_TEMPLATES["not_understood"]
1458
 
1459
+ audio_out = None
1460
+ try:
1461
+ wav_np, sr = _tts.synthesize(response_text, lang, device=device)
1462
+ audio_out = (sr, wav_np)
1463
+ except Exception as tts_err:
1464
+ import logging
1465
+ logging.getLogger(__name__).warning("TTS error: %s", tts_err)
1466
  chat_msgs = [[u, v] for u, v in history]
1467
+ return english_translation, response_text, audio_out, history, chat_msgs
1468
 
1469
 
1470
  # ── Gradio UI ─────────────────────────────────────────────────────────────────
 
1948
  FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
1949
  ADAPTER_REPO_ID = os.environ.get("ADAPTER_REPO_ID", "ous-sow/sahel-agri-adapters")
1950
  WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-small")
1951
+ LLM_MODEL_ID = os.environ.get("LLM_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
1952
 
1953
  if HF_TOKEN:
1954
  from huggingface_hub import HfApi
 
1961
  _ensure_whisper_loaded()
1962
 
1963
  print(f"Whisper model : {WHISPER_MODEL_ID}")
1964
+ print(f"LLM model : {LLM_MODEL_ID}")
1965
  print(f"Feedback repo : {FEEDBACK_REPO_ID}")
1966
  print(f"Adapter repo : {ADAPTER_REPO_ID}")
1967
  print(f"HF_TOKEN set : {'yes' if HF_TOKEN else 'no (local-only mode)'}")