Spaces:
Running
Running
jefffffff9 Claude Sonnet 4.6 commited on
Commit ·
61e52d7
1
Parent(s): 096b19d
Fix: switch LLM to Qwen2.5-72B-Instruct (Gemma not on HF free tier)
Browse filesgoogle/gemma-3-4b-it requires a paid provider on HF Serverless Inference.
Qwen/Qwen2.5-72B-Instruct is available on the free tier and has better
multilingual coverage for Bambara/Fula context.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- README.md +2 -2
- app_lab.py +2 -2
- src/llm/gemma_client.py +10 -2
README.md
CHANGED
|
@@ -29,7 +29,7 @@ A self-learning voice assistant for Bambara and Fula. Teach it words — it reme
|
|
| 29 |
| Component | Model |
|
| 30 |
|-----------|-------|
|
| 31 |
| STT | `openai/whisper-large-v3-turbo` |
|
| 32 |
-
| LLM | `
|
| 33 |
| TTS | Waxal — Phase 2 |
|
| 34 |
| Memory | HF Dataset `vocabulary.jsonl` |
|
| 35 |
|
|
@@ -47,4 +47,4 @@ A self-learning voice assistant for Bambara and Fula. Teach it words — it reme
|
|
| 47 |
|-----|-------|
|
| 48 |
| `HF_TOKEN` | Your HF write-access token |
|
| 49 |
| `FEEDBACK_REPO_ID` | `ous-sow/sahel-agri-feedback` |
|
| 50 |
-
| `LLM_MODEL_ID` | `
|
|
|
|
| 29 |
| Component | Model |
|
| 30 |
|-----------|-------|
|
| 31 |
| STT | `openai/whisper-large-v3-turbo` |
|
| 32 |
+
| LLM | `Qwen/Qwen2.5-72B-Instruct` (set `LLM_MODEL_ID` env var to override) |
|
| 33 |
| TTS | Waxal — Phase 2 |
|
| 34 |
| Memory | HF Dataset `vocabulary.jsonl` |
|
| 35 |
|
|
|
|
| 47 |
|-----|-------|
|
| 48 |
| `HF_TOKEN` | Your HF write-access token |
|
| 49 |
| `FEEDBACK_REPO_ID` | `ous-sow/sahel-agri-feedback` |
|
| 50 |
+
| `LLM_MODEL_ID` | `Qwen/Qwen2.5-72B-Instruct` (or any HF Serverless-supported model) |
|
app_lab.py
CHANGED
|
@@ -33,7 +33,7 @@ sys.path.insert(0, str(ROOT))
|
|
| 33 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 34 |
FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
|
| 35 |
WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-large-v3-turbo")
|
| 36 |
-
LLM_MODEL_ID = os.environ.get("LLM_MODEL_ID", "
|
| 37 |
|
| 38 |
LANGUAGE_NAMES = {
|
| 39 |
"bam": "Bambara",
|
|
@@ -387,7 +387,7 @@ if __name__ == "__main__":
|
|
| 387 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 388 |
FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
|
| 389 |
WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-large-v3-turbo")
|
| 390 |
-
LLM_MODEL_ID = os.environ.get("LLM_MODEL_ID", "
|
| 391 |
|
| 392 |
_memory._hf_token = HF_TOKEN
|
| 393 |
_memory._repo_id = FEEDBACK_REPO_ID
|
|
|
|
| 33 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 34 |
FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
|
| 35 |
WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-large-v3-turbo")
|
| 36 |
+
LLM_MODEL_ID = os.environ.get("LLM_MODEL_ID", "Qwen/Qwen2.5-72B-Instruct")
|
| 37 |
|
| 38 |
LANGUAGE_NAMES = {
|
| 39 |
"bam": "Bambara",
|
|
|
|
| 387 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 388 |
FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
|
| 389 |
WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-large-v3-turbo")
|
| 390 |
+
LLM_MODEL_ID = os.environ.get("LLM_MODEL_ID", "Qwen/Qwen2.5-72B-Instruct")
|
| 391 |
|
| 392 |
_memory._hf_token = HF_TOKEN
|
| 393 |
_memory._repo_id = FEEDBACK_REPO_ID
|
src/llm/gemma_client.py
CHANGED
|
@@ -9,7 +9,15 @@ The system prompt implements the 'adult-child' logic:
|
|
| 9 |
- It answers QUESTIONS using the vocabulary it has learned
|
| 10 |
|
| 11 |
Model: configurable via LLM_MODEL_ID env var.
|
| 12 |
-
Default:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
"""
|
| 14 |
from __future__ import annotations
|
| 15 |
|
|
@@ -63,7 +71,7 @@ class GemmaClient:
|
|
| 63 |
|
| 64 |
def __init__(
|
| 65 |
self,
|
| 66 |
-
model_id: str = "
|
| 67 |
hf_token: Optional[str] = None,
|
| 68 |
) -> None:
|
| 69 |
self.model_id = model_id
|
|
|
|
| 9 |
- It answers QUESTIONS using the vocabulary it has learned
|
| 10 |
|
| 11 |
Model: configurable via LLM_MODEL_ID env var.
|
| 12 |
+
Default: Qwen/Qwen2.5-72B-Instruct — reliably available on HF Serverless free tier.
|
| 13 |
+
|
| 14 |
+
Tested models that work on HF Serverless (no paid provider needed):
|
| 15 |
+
Qwen/Qwen2.5-72B-Instruct ← default, best quality
|
| 16 |
+
Qwen/Qwen2.5-7B-Instruct ← faster, slightly lower quality
|
| 17 |
+
mistralai/Mistral-7B-Instruct-v0.3
|
| 18 |
+
HuggingFaceH4/zephyr-7b-beta
|
| 19 |
+
|
| 20 |
+
google/gemma-3-4b-it is NOT on the free tier — it requires a paid provider.
|
| 21 |
"""
|
| 22 |
from __future__ import annotations
|
| 23 |
|
|
|
|
| 71 |
|
| 72 |
def __init__(
|
| 73 |
self,
|
| 74 |
+
model_id: str = "Qwen/Qwen2.5-72B-Instruct",
|
| 75 |
hf_token: Optional[str] = None,
|
| 76 |
) -> None:
|
| 77 |
self.model_id = model_id
|