jefffffff9 Claude Sonnet 4.6 commited on
Commit
61e52d7
·
1 Parent(s): 096b19d

Fix: switch LLM to Qwen2.5-72B-Instruct (Gemma not on HF free tier)

Browse files

google/gemma-3-4b-it requires a paid provider on HF Serverless Inference.
Qwen/Qwen2.5-72B-Instruct is available on the free tier and has better
multilingual coverage for Bambara/Fula context.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (3) hide show
  1. README.md +2 -2
  2. app_lab.py +2 -2
  3. src/llm/gemma_client.py +10 -2
README.md CHANGED
@@ -29,7 +29,7 @@ A self-learning voice assistant for Bambara and Fula. Teach it words — it reme
29
  | Component | Model |
30
  |-----------|-------|
31
  | STT | `openai/whisper-large-v3-turbo` |
32
- | LLM | `google/gemma-3-4b-it` (set `LLM_MODEL_ID` env var for Gemma 4) |
33
  | TTS | Waxal — Phase 2 |
34
  | Memory | HF Dataset `vocabulary.jsonl` |
35
 
@@ -47,4 +47,4 @@ A self-learning voice assistant for Bambara and Fula. Teach it words — it reme
47
  |-----|-------|
48
  | `HF_TOKEN` | Your HF write-access token |
49
  | `FEEDBACK_REPO_ID` | `ous-sow/sahel-agri-feedback` |
50
- | `LLM_MODEL_ID` | `google/gemma-3-4b-it` (or Gemma 4 model ID) |
 
29
  | Component | Model |
30
  |-----------|-------|
31
  | STT | `openai/whisper-large-v3-turbo` |
32
+ | LLM | `Qwen/Qwen2.5-72B-Instruct` (set `LLM_MODEL_ID` env var to override) |
33
  | TTS | Waxal — Phase 2 |
34
  | Memory | HF Dataset `vocabulary.jsonl` |
35
 
 
47
  |-----|-------|
48
  | `HF_TOKEN` | Your HF write-access token |
49
  | `FEEDBACK_REPO_ID` | `ous-sow/sahel-agri-feedback` |
50
+ | `LLM_MODEL_ID` | `Qwen/Qwen2.5-72B-Instruct` (or any HF Serverless-supported model) |
app_lab.py CHANGED
@@ -33,7 +33,7 @@ sys.path.insert(0, str(ROOT))
33
  HF_TOKEN = os.environ.get("HF_TOKEN")
34
  FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
35
  WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-large-v3-turbo")
36
- LLM_MODEL_ID = os.environ.get("LLM_MODEL_ID", "google/gemma-3-4b-it")
37
 
38
  LANGUAGE_NAMES = {
39
  "bam": "Bambara",
@@ -387,7 +387,7 @@ if __name__ == "__main__":
387
  HF_TOKEN = os.environ.get("HF_TOKEN")
388
  FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
389
  WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-large-v3-turbo")
390
- LLM_MODEL_ID = os.environ.get("LLM_MODEL_ID", "google/gemma-3-4b-it")
391
 
392
  _memory._hf_token = HF_TOKEN
393
  _memory._repo_id = FEEDBACK_REPO_ID
 
33
  HF_TOKEN = os.environ.get("HF_TOKEN")
34
  FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
35
  WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-large-v3-turbo")
36
+ LLM_MODEL_ID = os.environ.get("LLM_MODEL_ID", "Qwen/Qwen2.5-72B-Instruct")
37
 
38
  LANGUAGE_NAMES = {
39
  "bam": "Bambara",
 
387
  HF_TOKEN = os.environ.get("HF_TOKEN")
388
  FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
389
  WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-large-v3-turbo")
390
+ LLM_MODEL_ID = os.environ.get("LLM_MODEL_ID", "Qwen/Qwen2.5-72B-Instruct")
391
 
392
  _memory._hf_token = HF_TOKEN
393
  _memory._repo_id = FEEDBACK_REPO_ID
src/llm/gemma_client.py CHANGED
@@ -9,7 +9,15 @@ The system prompt implements the 'adult-child' logic:
9
  - It answers QUESTIONS using the vocabulary it has learned
10
 
11
  Model: configurable via LLM_MODEL_ID env var.
12
- Default: google/gemma-3-4b-it (update to Gemma 4 when available on HF Hub)
 
 
 
 
 
 
 
 
13
  """
14
  from __future__ import annotations
15
 
@@ -63,7 +71,7 @@ class GemmaClient:
63
 
64
  def __init__(
65
  self,
66
- model_id: str = "google/gemma-3-4b-it",
67
  hf_token: Optional[str] = None,
68
  ) -> None:
69
  self.model_id = model_id
 
9
  - It answers QUESTIONS using the vocabulary it has learned
10
 
11
  Model: configurable via LLM_MODEL_ID env var.
12
+ Default: Qwen/Qwen2.5-72B-Instruct reliably available on HF Serverless free tier.
13
+
14
+ Tested models that work on HF Serverless (no paid provider needed):
15
+ Qwen/Qwen2.5-72B-Instruct ← default, best quality
16
+ Qwen/Qwen2.5-7B-Instruct ← faster, slightly lower quality
17
+ mistralai/Mistral-7B-Instruct-v0.3
18
+ HuggingFaceH4/zephyr-7b-beta
19
+
20
+ google/gemma-3-4b-it is NOT on the free tier — it requires a paid provider.
21
  """
22
  from __future__ import annotations
23
 
 
71
 
72
  def __init__(
73
  self,
74
+ model_id: str = "Qwen/Qwen2.5-72B-Instruct",
75
  hf_token: Optional[str] = None,
76
  ) -> None:
77
  self.model_id = model_id