Spaces:

MataStrategy
/

ground-zero

Running

jefffffff9 Claude Sonnet 4.6 commited on 25 days ago

Commit

3657607

1 Parent(s): 082adaa

Add confidence loop, curiosity engine, and lightweight TTS

Task 1 — TTS refactor (src/tts/waxal_tts.py):
Switch Bambara TTS from Qwen2-based MALIBA-AI to
ynnov/ekodi-bambara-tts-female (VitsModel + AutoTokenizer) — much
lighter on CPU Basic; no trust_remote_code needed. Fula is an
explicit generate_pular_tts() placeholder returning None until the
model is trained.

Task 2 — Active learning / confidence loop (src/engine/stt_processor.py):
transcribe_with_confidence() wraps Whisper generate() with
output_scores=True and computes avg_logprob via
compute_transition_scores(). If avg_logprob < -1.0, app_lab.py
replaces the transcript with CONFUSION_PROMPT so the LLM asks the
user in English to repeat and explain the word.

Task 3 — Proactive gaps (src/engine/curiosity.py):
CuriosityEngine.maybe_ask() fires every 5 interactions — sends the
last 10 vocabulary entries to Qwen and appends a 🌱 question in the
chat asking the user to teach a missing agricultural term.

Task 4 — Zero-cost persistence:
MemoryManager._push_to_hub() was already async (background thread +
HfApi.upload_file). No changes needed.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (4) hide show

app_lab.py +52 -34
src/engine/curiosity.py +102 -0
src/engine/stt_processor.py +88 -0
src/tts/waxal_tts.py +32 -124

app_lab.py CHANGED Viewed

@@ -46,13 +46,20 @@ LANGUAGE_NAMES = {
 }
 # ── Singletons ────────────────────────────────────────────────────────────────
-from src.memory.memory_manager import MemoryManager
-from src.llm.gemma_client      import GemmaClient
-from src.tts.waxal_tts         import WaxalTTSEngine
-_memory  = MemoryManager(repo_id=FEEDBACK_REPO_ID, hf_token=HF_TOKEN)
-_gemma   = GemmaClient(model_id=LLM_MODEL_ID, hf_token=HF_TOKEN)
-_tts     = WaxalTTSEngine()
 # Whisper — loaded lazily in background
 _whisper_model     = None
@@ -103,38 +110,36 @@ def _whisper_status_label() -> str:
     return f"⚪ STT {s}"
-def _transcribe(audio_path: str, language_hint: str) -> str:
-    """Run Whisper STT. Returns transcribed text."""
     if _whisper_model is None:
-        return ""
-    import torch, librosa
     audio_np, _ = librosa.load(audio_path, sr=16_000, mono=True)
     with _whisper_lock:
-        inputs = _whisper_processor.feature_extractor(
-            audio_np, sampling_rate=16_000, return_tensors="pt"
         )
-        input_features = inputs.input_features
-        # Whisper doesn't have Bambara / Fula tokens — let it auto-detect
-        if language_hint in ("bam", "ful"):
-            forced_ids = None
-        else:
-            try:
-                forced_ids = _whisper_processor.get_decoder_prompt_ids(
-                    language=language_hint, task="transcribe"
-                )
-            except Exception:
-                forced_ids = None
-        with torch.no_grad():
-            predicted_ids = _whisper_model.generate(
-                input_features,
-                forced_decoder_ids=forced_ids,
-                max_new_tokens=256,
-            )
-    return _whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0].strip()
 # ── Core pipeline ─────────────────────────────────────────────────────────────
@@ -175,6 +180,11 @@ def _run_llm_and_tts(
     history.append({"role": "user",      "content": f"[{LANGUAGE_NAMES.get(lang_code, lang_code)}] {transcript}"})
     history.append({"role": "assistant", "content": response})
     tts_status = "" if audio_out else " (TTS not available for this language yet)"
     status_msg = {
         "teaching":     f"✅ Word learned and saved!{tts_status}",
@@ -201,10 +211,18 @@ def process_audio(audio_path, language_label: str, history: list) -> tuple:
         if _whisper_model is None:
             return history, _render_recent_words(), f"⏳ {status} — wait a moment and try again.", None
-        transcript = _transcribe(audio_path, lang_code)
         if not transcript:
             return history, _render_recent_words(), "⚠️ Could not transcribe audio.", None
         return _run_llm_and_tts(transcript, lang_code, history, "voice")
     except Exception as exc:
         logger.exception("process_audio error")

 }
 # ── Singletons ────────────────────────────────────────────────────────────────
+from src.memory.memory_manager      import MemoryManager
+from src.llm.gemma_client           import GemmaClient
+from src.tts.waxal_tts              import WaxalTTSEngine
+from src.engine.stt_processor       import (
+    transcribe_with_confidence,
+    LOW_CONFIDENCE_THRESHOLD,
+    CONFUSION_PROMPT,
+)
+from src.engine.curiosity           import CuriosityEngine
+_memory    = MemoryManager(repo_id=FEEDBACK_REPO_ID, hf_token=HF_TOKEN)
+_gemma     = GemmaClient(model_id=LLM_MODEL_ID, hf_token=HF_TOKEN)
+_tts       = WaxalTTSEngine()
+_curiosity = CuriosityEngine(interval=5)
 # Whisper — loaded lazily in background
 _whisper_model     = None
     return f"⚪ STT {s}"
+def _transcribe(audio_path: str, language_hint: str) -> tuple[str, float]:
+    """
+    Run Whisper STT with confidence scoring.
+    Returns (text, avg_logprob).  avg_logprob < LOW_CONFIDENCE_THRESHOLD → confused.
+    """
     if _whisper_model is None:
+        return "", 0.0
+    import librosa
     audio_np, _ = librosa.load(audio_path, sr=16_000, mono=True)
+    # Whisper has no Bambara/Fula tokens — skip forced language for those
+    if language_hint in ("bam", "ful"):
+        forced_ids = None
+    else:
+        try:
+            forced_ids = _whisper_processor.get_decoder_prompt_ids(
+                language=language_hint, task="transcribe"
+            )
+        except Exception:
+            forced_ids = None
     with _whisper_lock:
+        text, avg_logprob = transcribe_with_confidence(
+            audio_np,
+            _whisper_model,
+            _whisper_processor,
+            forced_ids,
         )
+    return text, avg_logprob
 # ── Core pipeline ─────────────────────────────────────────────────────────────
     history.append({"role": "user",      "content": f"[{LANGUAGE_NAMES.get(lang_code, lang_code)}] {transcript}"})
     history.append({"role": "assistant", "content": response})
+    # 5. Curiosity check — every 5 interactions, ask about a vocabulary gap
+    curiosity_q = _curiosity.maybe_ask(_memory, _gemma)
+    if curiosity_q:
+        history.append({"role": "assistant", "content": f"🌱 {curiosity_q}"})
     tts_status = "" if audio_out else " (TTS not available for this language yet)"
     status_msg = {
         "teaching":     f"✅ Word learned and saved!{tts_status}",
         if _whisper_model is None:
             return history, _render_recent_words(), f"⏳ {status} — wait a moment and try again.", None
+        transcript, avg_logprob = _transcribe(audio_path, lang_code)
         if not transcript:
             return history, _render_recent_words(), "⚠️ Could not transcribe audio.", None
+        # Low-confidence transcription → ask user to repeat and explain
+        if avg_logprob < LOW_CONFIDENCE_THRESHOLD:
+            logger.info(
+                "Low STT confidence (avg_logprob=%.3f) — switching to confusion prompt",
+                avg_logprob,
+            )
+            transcript = CONFUSION_PROMPT
         return _run_llm_and_tts(transcript, lang_code, history, "voice")
     except Exception as exc:
         logger.exception("process_audio error")

src/engine/curiosity.py ADDED Viewed

	@@ -0,0 +1,102 @@

+"""
+CuriosityEngine — proactive vocabulary gap analysis.
+Every N interactions (default: 5), sends the last 10 vocabulary entries to
+the LLM and asks it to identify one related agricultural / everyday term that
+is missing from the learner's vocabulary, then formulate a question asking the
+user how to say that word in their language.
+Usage in app_lab.py:
+    _curiosity = CuriosityEngine(interval=5)
+    # Inside _run_llm_and_tts, after the main LLM call:
+    question = _curiosity.maybe_ask(_memory, _gemma)
+    if question:
+        history.append({"role": "assistant", "content": f"🌱 {question}"})
+"""
+from __future__ import annotations
+import logging
+from typing import TYPE_CHECKING, Optional
+if TYPE_CHECKING:
+    from src.memory.memory_manager import MemoryManager
+    from src.llm.gemma_client import GemmaClient
+logger = logging.getLogger(__name__)
+_CURIOSITY_SYSTEM = """\
+You are a language-learning assistant that notices gaps in a West African vocabulary list.
+Reply with a single valid JSON object and nothing else.\
+"""
+_CURIOSITY_USER_TEMPLATE = """\
+Here are the {n} most recent words I have learned so far:
+{vocab_list}
+Based on these words, what is ONE related agricultural or common everyday term \
+I am likely missing?  Formulate a short, warm question asking the user how to say \
+that missing word in their language.
+Reply only with this JSON:
+{{
+  "word_suggestion": "<the English word you think is missing>",
+  "question": "<one friendly sentence asking the user>"
+}}
+"""
+class CuriosityEngine:
+    """Fires a vocabulary-gap prompt every `interval` user interactions."""
+    def __init__(self, interval: int = 5) -> None:
+        self._interval    = interval
+        self._interaction = 0
+    def maybe_ask(
+        self,
+        memory: "MemoryManager",
+        gemma: "GemmaClient",
+    ) -> Optional[str]:
+        """
+        Increment the interaction counter.  On every `interval`-th call, query
+        the LLM for a missing vocabulary term and return the question string.
+        Returns None on all other calls, or if vocabulary is too sparse, or if
+        the LLM call fails.
+        """
+        self._interaction += 1
+        if self._interaction % self._interval != 0:
+            return None
+        entries = memory.get_all()
+        if len(entries) < 3:
+            logger.debug("CuriosityEngine: vocabulary too sparse (%d entries)", len(entries))
+            return None
+        recent = entries[-10:]
+        lines  = [
+            f"  [{e.get('language','?')}] {e.get('word','')} = {e.get('translation','')}"
+            for e in recent
+        ]
+        prompt = _CURIOSITY_USER_TEMPLATE.format(
+            n=len(lines),
+            vocab_list="\n".join(lines),
+        )
+        try:
+            # Pass the curiosity prompt as user text; empty vocab context to avoid
+            # duplicating the word list inside the system prompt.
+            result = gemma.chat(prompt, vocabulary_context="(see above)")
+            question = result.get("question") or result.get("response")
+            if question:
+                word = result.get("word_suggestion", "")
+                logger.info(
+                    "CuriosityEngine: suggesting '%s' — %s",
+                    word,
+                    question[:80],
+                )
+                return question.strip()
+        except Exception as exc:
+            logger.warning("CuriosityEngine: LLM call failed: %s", exc)
+        return None

src/engine/stt_processor.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""
+STT confidence extractor.
+Wraps Whisper's generate() with return_dict_in_generate=True to compute
+avg_logprob — the mean log-probability over generated tokens. This mirrors
+the avg_logprob field returned by the OpenAI Whisper API.
+Threshold: avg_logprob < -1.0 signals a low-confidence transcription where
+the model was essentially guessing.  The caller should treat this as "confused"
+and prompt the user to repeat and explain the word.
+"""
+from __future__ import annotations
+import logging
+import numpy as np
+import torch
+logger = logging.getLogger(__name__)
+# Anything below this is considered "confused" transcription
+LOW_CONFIDENCE_THRESHOLD: float = -1.0
+# Message substituted for the transcript when confidence is low
+CONFUSION_PROMPT: str = (
+    "The user spoke, but I am confused. "
+    "Ask the user in English to repeat the local word and explain its meaning."
+)
+def transcribe_with_confidence(
+    audio_np: np.ndarray,
+    model,
+    processor,
+    forced_ids,
+    max_new_tokens: int = 256,
+) -> tuple[str, float]:
+    """
+    Run Whisper and return (text, avg_logprob).
+    avg_logprob is in (-inf, 0].  A value close to 0 means high confidence.
+    Returns avg_logprob = 0.0 if computation fails (treated as confident).
+    Args:
+        audio_np:       float32 audio at 16 kHz.
+        model:          WhisperForConditionalGeneration instance.
+        processor:      WhisperProcessor instance.
+        forced_ids:     Output of get_decoder_prompt_ids() or None.
+        max_new_tokens: Maximum tokens to generate.
+    """
+    inputs = processor.feature_extractor(
+        audio_np, sampling_rate=16_000, return_tensors="pt"
+    )
+    input_features = inputs.input_features
+    with torch.no_grad():
+        output = model.generate(
+            input_features,
+            forced_decoder_ids=forced_ids,
+            max_new_tokens=max_new_tokens,
+            return_dict_in_generate=True,
+            output_scores=True,
+        )
+    text = processor.batch_decode(output.sequences, skip_special_tokens=True)[0].strip()
+    # Compute avg log-prob via model.compute_transition_scores
+    avg_logprob = 0.0
+    try:
+        transition_scores = model.compute_transition_scores(
+            output.sequences,
+            output.scores,
+            normalize_logits=True,
+        )
+        # Shape: (batch, generated_len).  Take batch[0], skip zero-padded positions.
+        scores = transition_scores[0]
+        valid  = scores[scores != 0]
+        if valid.numel() > 0:
+            avg_logprob = valid.mean().item()
+    except Exception as exc:
+        logger.debug("avg_logprob computation failed: %s", exc)
+    logger.debug(
+        "STT confidence: avg_logprob=%.3f  text=%r",
+        avg_logprob,
+        text[:60],
+    )
+    return text, avg_logprob

src/tts/waxal_tts.py CHANGED Viewed

@@ -1,23 +1,13 @@
 """
-WaxalTTSEngine — Phase 2 TTS for Sahel-Voice-Lab.
-Bambara : MALIBA-AI/bambara-tts loaded directly via transformers
-          (avoids pip-installing maliba-ai at runtime, which fails because
-           HF Spaces blocks outbound GitHub connections)
-Fula    : ous-sow/fula-tts  (VITS, trained via notebooks/train_fula_tts.ipynb)
-French/English : not yet integrated — returns None (text-only fallback)
-Architecture:
-  MALIBA-AI uses a Qwen2-based architecture.  We load it with
-  AutoModelForCausalLM + AutoTokenizer, run greedy decoding, and extract
-  the waveform from the model output — matching what BambaraTTSInference does
-  internally without needing the package installed.
 """
 from __future__ import annotations
 import logging
 import os
-import tempfile
 import threading
 from typing import Optional
@@ -25,13 +15,23 @@ import numpy as np
 logger = logging.getLogger(__name__)
-BAMBARA_TTS_REPO = "MALIBA-AI/bambara-tts"
-FULA_TTS_REPO    = os.environ.get("FULA_TTS_REPO", "ous-sow/fula-tts")
 HF_TOKEN         = os.environ.get("HF_TOKEN")
 class WaxalTTSEngine:
-    """Unified TTS engine for Bambara and Fula."""
     def __init__(self) -> None:
         self._lock = threading.Lock()
@@ -40,18 +40,13 @@ class WaxalTTSEngine:
         self._bam_tokenizer = None
         self._bam_ready     = False
         self._bam_error: Optional[str] = None
-        # Fula
-        self._ful_model     = None
-        self._ful_tokenizer = None
-        self._ful_ready     = False
-        self._ful_error: Optional[str] = None
     # ── Public API ────────────────────────────────────────────────────────────
     def synthesize(self, text: str, lang: str) -> Optional[tuple[np.ndarray, int]]:
         """
         Returns (audio_float32, sample_rate) or None if TTS unavailable.
-        Never raises — all errors are logged and None is returned.
         """
         text = text.strip()
         if not text:
@@ -60,7 +55,7 @@ class WaxalTTSEngine:
             if lang == "bam":
                 return self._synthesize_bambara(text)
             elif lang == "ful":
-                return self._synthesize_fula(text)
             else:
                 return None
         except Exception as exc:
@@ -71,45 +66,26 @@ class WaxalTTSEngine:
         bam = "ready" if self._bam_ready else (
             f"error: {self._bam_error}" if self._bam_error else "loading…"
         )
-        ful = "ready" if self._ful_ready else (
-            f"error: {self._ful_error}" if self._ful_error else "not trained yet"
-        )
-        return {"bam": bam, "ful": ful}
     def preload(self) -> None:
-        """Start background threads to load both models."""
         threading.Thread(target=self._load_bambara, daemon=True).start()
-        threading.Thread(target=self._load_fula,    daemon=True).start()
-    # ── Bambara (MALIBA-AI/bambara-tts via AutoModel) ─────────────────────────
     def _load_bambara(self) -> None:
-        """
-        Load MALIBA-AI/bambara-tts directly from HF Hub using transformers.
-        No pip install needed — just model weights downloaded to the HF cache.
-        """
         try:
-            from transformers import AutoTokenizer, AutoModelForCausalLM
-            import torch
             logger.info("WaxalTTS: loading Bambara TTS from %s …", BAMBARA_TTS_REPO)
-            tok = AutoTokenizer.from_pretrained(
-                BAMBARA_TTS_REPO, token=HF_TOKEN, trust_remote_code=True
-            )
-            mdl = AutoModelForCausalLM.from_pretrained(
-                BAMBARA_TTS_REPO,
-                token=HF_TOKEN,
-                trust_remote_code=True,
-                torch_dtype=torch.float32,
-            )
             mdl.eval()
             with self._lock:
                 self._bam_tokenizer = tok
                 self._bam_model     = mdl
                 self._bam_ready     = True
             logger.info("WaxalTTS: Bambara TTS ready")
         except Exception as exc:
             self._bam_error = str(exc)
             logger.error("WaxalTTS: Bambara TTS load failed: %s", exc)
@@ -118,89 +94,21 @@ class WaxalTTSEngine:
         if not self._bam_ready:
             self._load_bambara()
         if not self._bam_ready:
-            logger.warning("WaxalTTS: Bambara TTS not ready (%s)", self._bam_error)
-            return None
-        try:
-            import torch, soundfile as sf
-            with self._lock:
-                inputs = self._bam_tokenizer(
-                    text, return_tensors="pt", add_special_tokens=True
-                )
-                with torch.no_grad():
-                    output = self._bam_model.generate(
-                        **inputs,
-                        max_new_tokens=1024,
-                        do_sample=False,
-                    )
-            # MALIBA-AI model returns waveform tokens — decode to audio
-            # The model's generate() returns a waveform directly when it has
-            # an audio head; try standard attribute paths.
-            audio = None
-            sr    = 16_000
-            if hasattr(output, "waveform"):
-                audio = output.waveform[0].cpu().float().numpy()
-            elif hasattr(output, "audio"):
-                audio = output.audio[0].cpu().float().numpy()
-            else:
-                # Fallback: treat output as token ids and use vocoder if present
-                logger.warning(
-                    "WaxalTTS: Bambara model output type %s — expected waveform attribute",
-                    type(output)
-                )
-                return None
-            if audio.ndim > 1:
-                audio = audio.mean(axis=1)
-            return audio.astype(np.float32), sr
-        except Exception as exc:
-            logger.error("WaxalTTS: Bambara synthesis failed: %s", exc)
-            self._bam_error = str(exc)
-            self._bam_ready = False
-            return None
-    # ── Fula (ous-sow/fula-tts, VITS) ────────────────────────────────────────
-    def _load_fula(self) -> None:
-        try:
-            from transformers import VitsModel, VitsTokenizer
-            logger.info("WaxalTTS: loading Fula TTS from %s …", FULA_TTS_REPO)
-            tok = VitsTokenizer.from_pretrained(FULA_TTS_REPO, token=HF_TOKEN)
-            mdl = VitsModel.from_pretrained(FULA_TTS_REPO, token=HF_TOKEN)
-            mdl.eval()
-            with self._lock:
-                self._ful_tokenizer = tok
-                self._ful_model     = mdl
-                self._ful_ready     = True
-            logger.info("WaxalTTS: Fula TTS ready")
-        except Exception as exc:
-            msg = str(exc)
-            if any(k in msg.lower() for k in ("not found", "404", "repository", "does not exist")):
-                self._ful_error = "not trained yet — run notebooks/train_fula_tts.ipynb on Kaggle"
-            else:
-                self._ful_error = msg
-            logger.warning("WaxalTTS: Fula TTS unavailable: %s", self._ful_error)
-    def _synthesize_fula(self, text: str) -> Optional[tuple[np.ndarray, int]]:
-        if not self._ful_ready:
-            self._load_fula()
-        if not self._ful_ready:
             return None
         try:
             import torch
             with self._lock:
-                inputs = self._ful_tokenizer(text, return_tensors="pt")
                 with torch.no_grad():
-                    output = self._ful_model(**inputs)
                 audio = output.waveform[0].cpu().numpy().astype(np.float32)
-                sr    = self._ful_model.config.sampling_rate
             return audio, sr
         except Exception as exc:
-            logger.error("WaxalTTS: Fula synthesis failed: %s", exc)
             return None
     # ── Utility ───────────────────────────────────────────────────────────────

 """
+WaxalTTSEngine — lightweight VITS-based TTS for Sahel-Voice-Lab.
+Bambara : ynnov/ekodi-bambara-tts-female  (VitsModel + AutoTokenizer)
+Fula    : placeholder — returns None until ous-sow/fula-tts is trained
 """
 from __future__ import annotations
 import logging
 import os
 import threading
 from typing import Optional
 logger = logging.getLogger(__name__)
+BAMBARA_TTS_REPO = os.environ.get("BAMBARA_TTS_REPO", "ynnov/ekodi-bambara-tts-female")
+FULA_TTS_REPO    = os.environ.get("FULA_TTS_REPO",    "ous-sow/fula-tts")
 HF_TOKEN         = os.environ.get("HF_TOKEN")
+def generate_pular_tts(text: str) -> None:
+    """
+    Placeholder for Fula (Pulaar) TTS.
+    Returns None until ous-sow/fula-tts is trained and pushed to the Hub.
+    Run notebooks/train_fula_tts.ipynb on Kaggle T4 to produce the model.
+    """
+    logger.info("generate_pular_tts: model not yet trained — returning None")
+    return None
 class WaxalTTSEngine:
+    """Unified TTS engine: Bambara (VITS) + Fula (placeholder)."""
     def __init__(self) -> None:
         self._lock = threading.Lock()
         self._bam_tokenizer = None
         self._bam_ready     = False
         self._bam_error: Optional[str] = None
     # ── Public API ────────────────────────────────────────────────────────────
     def synthesize(self, text: str, lang: str) -> Optional[tuple[np.ndarray, int]]:
         """
         Returns (audio_float32, sample_rate) or None if TTS unavailable.
+        Never raises — all errors are logged.
         """
         text = text.strip()
         if not text:
             if lang == "bam":
                 return self._synthesize_bambara(text)
             elif lang == "ful":
+                return generate_pular_tts(text)
             else:
                 return None
         except Exception as exc:
         bam = "ready" if self._bam_ready else (
             f"error: {self._bam_error}" if self._bam_error else "loading…"
         )
+        return {"bam": bam, "ful": "not trained yet"}
     def preload(self) -> None:
+        """Start background thread to load the Bambara model."""
         threading.Thread(target=self._load_bambara, daemon=True).start()
+    # ── Bambara (ynnov/ekodi-bambara-tts-female, VITS) ───────────────────────
     def _load_bambara(self) -> None:
         try:
+            from transformers import VitsModel, AutoTokenizer
             logger.info("WaxalTTS: loading Bambara TTS from %s …", BAMBARA_TTS_REPO)
+            tok = AutoTokenizer.from_pretrained(BAMBARA_TTS_REPO, token=HF_TOKEN)
+            mdl = VitsModel.from_pretrained(BAMBARA_TTS_REPO, token=HF_TOKEN)
             mdl.eval()
             with self._lock:
                 self._bam_tokenizer = tok
                 self._bam_model     = mdl
                 self._bam_ready     = True
             logger.info("WaxalTTS: Bambara TTS ready")
         except Exception as exc:
             self._bam_error = str(exc)
             logger.error("WaxalTTS: Bambara TTS load failed: %s", exc)
         if not self._bam_ready:
             self._load_bambara()
         if not self._bam_ready:
+            logger.warning("WaxalTTS: Bambara TTS not ready — %s", self._bam_error)
             return None
         try:
             import torch
             with self._lock:
+                inputs = self._bam_tokenizer(text, return_tensors="pt")
                 with torch.no_grad():
+                    output = self._bam_model(**inputs)
                 audio = output.waveform[0].cpu().numpy().astype(np.float32)
+                sr    = self._bam_model.config.sampling_rate
             return audio, sr
         except Exception as exc:
+            logger.error("WaxalTTS: Bambara synthesis failed: %s", exc)
+            self._bam_error = str(exc)
+            self._bam_ready = False
             return None
     # ── Utility ───────────────────────────────────────────────────────────────