Spaces:

MataStrategy
/

ground-zero

Running

jefffffff9 Claude Sonnet 4.6 commited on about 1 month ago

Commit

61e52d7

1 Parent(s): 096b19d

Fix: switch LLM to Qwen2.5-72B-Instruct (Gemma not on HF free tier)

google/gemma-3-4b-it requires a paid provider on HF Serverless Inference.
Qwen/Qwen2.5-72B-Instruct is available on the free tier and has better
multilingual coverage for Bambara/Fula context.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (3) hide show

README.md +2 -2
app_lab.py +2 -2
src/llm/gemma_client.py +10 -2

README.md CHANGED Viewed

@@ -29,7 +29,7 @@ A self-learning voice assistant for Bambara and Fula. Teach it words — it reme
 | Component | Model |
 |-----------|-------|
 | STT | `openai/whisper-large-v3-turbo` |
-| LLM | `google/gemma-3-4b-it` (set `LLM_MODEL_ID` env var for Gemma 4) |
 | TTS | Waxal — Phase 2 |
 | Memory | HF Dataset `vocabulary.jsonl` |
@@ -47,4 +47,4 @@ A self-learning voice assistant for Bambara and Fula. Teach it words — it reme
 |-----|-------|
 | `HF_TOKEN` | Your HF write-access token |
 | `FEEDBACK_REPO_ID` | `ous-sow/sahel-agri-feedback` |
-| `LLM_MODEL_ID` | `google/gemma-3-4b-it` (or Gemma 4 model ID) |

 | Component | Model |
 |-----------|-------|
 | STT | `openai/whisper-large-v3-turbo` |
+| LLM | `Qwen/Qwen2.5-72B-Instruct` (set `LLM_MODEL_ID` env var to override) |
 | TTS | Waxal — Phase 2 |
 | Memory | HF Dataset `vocabulary.jsonl` |
 |-----|-------|
 | `HF_TOKEN` | Your HF write-access token |
 | `FEEDBACK_REPO_ID` | `ous-sow/sahel-agri-feedback` |
+| `LLM_MODEL_ID` | `Qwen/Qwen2.5-72B-Instruct` (or any HF Serverless-supported model) |

app_lab.py CHANGED Viewed

@@ -33,7 +33,7 @@ sys.path.insert(0, str(ROOT))
 HF_TOKEN         = os.environ.get("HF_TOKEN")
 FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
 WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-large-v3-turbo")
-LLM_MODEL_ID     = os.environ.get("LLM_MODEL_ID",     "google/gemma-3-4b-it")
 LANGUAGE_NAMES = {
     "bam": "Bambara",
@@ -387,7 +387,7 @@ if __name__ == "__main__":
     HF_TOKEN         = os.environ.get("HF_TOKEN")
     FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
     WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-large-v3-turbo")
-    LLM_MODEL_ID     = os.environ.get("LLM_MODEL_ID",     "google/gemma-3-4b-it")
     _memory._hf_token = HF_TOKEN
     _memory._repo_id  = FEEDBACK_REPO_ID

 HF_TOKEN         = os.environ.get("HF_TOKEN")
 FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
 WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-large-v3-turbo")
+LLM_MODEL_ID     = os.environ.get("LLM_MODEL_ID",     "Qwen/Qwen2.5-72B-Instruct")
 LANGUAGE_NAMES = {
     "bam": "Bambara",
     HF_TOKEN         = os.environ.get("HF_TOKEN")
     FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
     WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-large-v3-turbo")
+    LLM_MODEL_ID     = os.environ.get("LLM_MODEL_ID",     "Qwen/Qwen2.5-72B-Instruct")
     _memory._hf_token = HF_TOKEN
     _memory._repo_id  = FEEDBACK_REPO_ID

src/llm/gemma_client.py CHANGED Viewed

@@ -9,7 +9,15 @@ The system prompt implements the 'adult-child' logic:
   - It answers QUESTIONS using the vocabulary it has learned
 Model: configurable via LLM_MODEL_ID env var.
-Default: google/gemma-3-4b-it  (update to Gemma 4 when available on HF Hub)
 """
 from __future__ import annotations
@@ -63,7 +71,7 @@ class GemmaClient:
     def __init__(
         self,
-        model_id: str = "google/gemma-3-4b-it",
         hf_token: Optional[str] = None,
     ) -> None:
         self.model_id  = model_id

   - It answers QUESTIONS using the vocabulary it has learned
 Model: configurable via LLM_MODEL_ID env var.
+Default: Qwen/Qwen2.5-72B-Instruct  — reliably available on HF Serverless free tier.
+Tested models that work on HF Serverless (no paid provider needed):
+  Qwen/Qwen2.5-72B-Instruct        ← default, best quality
+  Qwen/Qwen2.5-7B-Instruct         ← faster, slightly lower quality
+  mistralai/Mistral-7B-Instruct-v0.3
+  HuggingFaceH4/zephyr-7b-beta
+google/gemma-3-4b-it is NOT on the free tier — it requires a paid provider.
 """
 from __future__ import annotations
     def __init__(
         self,
+        model_id: str = "Qwen/Qwen2.5-72B-Instruct",
         hf_token: Optional[str] = None,
     ) -> None:
         self.model_id  = model_id