Spaces:

MataStrategy
/

ground-zero

Running

App Files Files Community

jefffffff9 commited on 25 days ago

Commit

76db545

0 Parent(s):

Initial commit: Sahel-Agri Voice AI

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.claude/settings.local.json +7 -0
.env.example +20 -0
.gitignore +66 -0
.vscode/extensions.json +5 -0
README.md +39 -0
app.py +611 -0
configs/api_config.yaml +21 -0
configs/base_config.yaml +30 -0
configs/lora_bambara.yaml +19 -0
configs/lora_fula.yaml +19 -0
noise_samples/README.md +20 -0
notebooks/bootstrap_repos.ipynb +308 -0
notebooks/train_colab.ipynb +283 -0
packages.txt +1 -0
requirements.txt +50 -0
scripts/export_onnx.py +67 -0
scripts/run_data_pipeline.py +76 -0
scripts/run_server.py +42 -0
scripts/train_bambara.py +28 -0
scripts/train_fula.py +29 -0
scripts/verify_baseline.py +78 -0
src/__init__.py +0 -0
src/api/__init__.py +0 -0
src/api/app.py +98 -0
src/api/dependencies.py +20 -0
src/api/middleware.py +47 -0
src/api/routes/__init__.py +0 -0
src/api/routes/health.py +25 -0
src/api/routes/iot.py +90 -0
src/api/routes/transcribe.py +74 -0
src/api/schemas.py +36 -0
src/data/__init__.py +0 -0
src/data/agri_dictionary.py +92 -0
src/data/augmentation.py +84 -0
src/data/feature_extractor.py +89 -0
src/data/waxal_loader.py +119 -0
src/engine/__init__.py +0 -0
src/engine/adapter_manager.py +106 -0
src/engine/transcriber.py +132 -0
src/engine/whisper_base.py +77 -0
src/iot/__init__.py +0 -0
src/iot/intent_parser.py +75 -0
src/iot/sensor_bridge.py +121 -0
src/iot/voice_responder.py +260 -0
src/optimization/__init__.py +0 -0
src/optimization/onnx_exporter.py +106 -0
src/optimization/quantizer.py +95 -0
src/optimization/tflite_converter.py +76 -0
src/training/__init__.py +0 -0
src/training/callbacks.py +83 -0

.claude/settings.local.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "permissions": {
+    "allow": [
+      "Bash(pip show:*)"
+    ]
+  }
+}

.env.example ADDED Viewed

	@@ -0,0 +1,20 @@

+# HuggingFace read token (required for accessing google/waxal dataset)
+HF_TOKEN=hf_your_token_here
+# Model
+MODEL_ID=openai/whisper-large-v3-turbo
+# Adapter paths (relative to project root)
+BAMBARA_ADAPTER_PATH=./adapters/bambara
+FULA_ADAPTER_PATH=./adapters/fula
+# IoT sensor API endpoint (leave empty to use mock data in development)
+SENSOR_API_URL=
+# FastAPI server
+API_HOST=0.0.0.0
+API_PORT=8000
+LOG_LEVEL=INFO
+# Device: "cuda" for GPU, "cpu" for CPU-only
+DEVICE=cuda

.gitignore ADDED Viewed

	@@ -0,0 +1,66 @@

+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+.Python
+*.egg-info/
+dist/
+build/
+.eggs/
+# Environment
+.env
+venv/
+.venv/
+env/
+# Model weights (large binary files)
+*.pt
+*.pth
+*.bin
+*.safetensors
+*.ckpt
+# ONNX / TFLite exports
+*.onnx
+*.tflite
+models/onnx/
+models/tflite/
+# HuggingFace cache
+data_cache/
+.cache/
+# Audio noise samples (user must provide their own)
+noise_samples/*.wav
+noise_samples/*.mp3
+noise_samples/*.ogg
+# Trained adapters (tracked separately or via DVC)
+adapters/bambara/
+adapters/fula/
+# IDE
+.vscode/settings.json
+.idea/
+*.code-workspace
+# OS
+.DS_Store
+Thumbs.db
+# Logs
+*.log
+logs/
+# Local feedback data (audio + corrections live in HF Dataset repo, not git)
+feedback/
+# Local model downloads
+models/
+# Pytest
+.pytest_cache/
+htmlcov/
+.coverage

.vscode/extensions.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "recommendations": [
+        "anthropic.claude-code"
+    ]
+}

README.md ADDED Viewed

	@@ -0,0 +1,39 @@

+---
+title: Sahel-Agri Voice AI
+emoji: 🌾
+colorFrom: green
+colorTo: yellow
+sdk: gradio
+sdk_version: "4.44.0"
+app_file: app.py
+hardware: cpu-basic
+pinned: false
+license: mit
+tags:
+  - agriculture
+  - bambara
+  - fula
+  - speech-recognition
+  - text-to-speech
+  - west-africa
+  - low-resource-nlp
+---
+# 🌾 Sahel-Agri Voice AI
+Two-way voice assistant for Malian and Guinean farmers. Speak in **Bambara** or **Fula** — get agricultural insights spoken back in your language.
+## Features
+- 🎙️ Voice input via microphone or file upload
+- 🌍 Bambara (bam) and Fula (ful) speech recognition via Whisper + LoRA adapters
+- 🔊 Native-language voice responses via Facebook MMS-TTS
+- 📊 Soil, weather, irrigation, and pest alerts from IoT sensors
+- 💾 Feedback saved to HuggingFace Dataset for continuous improvement
+## Languages supported
+| Language | STT | TTS |
+|----------|-----|-----|
+| Bambara (bam) | ✅ Whisper + LoRA | ✅ facebook/mms-tts-bam |
+| Fula (ful) | ✅ Whisper + LoRA | ✅ facebook/mms-tts-ful |
+| French (fr) | ✅ Whisper | ✅ facebook/mms-tts-fra |
+| English (en) | ✅ Whisper | ✅ facebook/mms-tts-eng |

app.py ADDED Viewed

	@@ -0,0 +1,611 @@

+"""
+Sahel-Agri Voice AI — HuggingFace Spaces (ZeroGPU)
+Two-way voice assistant: Bambara / Fula / French / English → voice response
+Environment variables (set in Space Settings → Secrets):
+  HF_TOKEN          — HF write-access token
+  FEEDBACK_REPO_ID  — e.g. ous-sow/sahel-agri-feedback  (dataset, private)
+  ADAPTER_REPO_ID   — e.g. ous-sow/sahel-agri-adapters   (model, private)
+  WHISPER_MODEL_ID  — default: openai/whisper-large-v3-turbo
+                      (use openai/whisper-base for local CPU testing)
+"""
+from __future__ import annotations
+import io
+import json
+import os
+import sys
+import tempfile
+import threading
+from datetime import datetime, timezone
+from pathlib import Path
+import gradio as gr
+import numpy as np
+ROOT = Path(__file__).parent
+sys.path.insert(0, str(ROOT))
+# ── env ───────────────────────────────────────────────────────────────────────
+HF_TOKEN         = os.environ.get("HF_TOKEN")
+FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
+ADAPTER_REPO_ID  = os.environ.get("ADAPTER_REPO_ID",  "ous-sow/sahel-agri-adapters")
+# whisper-small: ~10s on cpu-basic, good multilingual quality.
+# Override via WHISPER_MODEL_ID env var if you upgrade to a GPU Space later.
+WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID",  "openai/whisper-small")
+# On local CPU (no HF_TOKEN / no spaces package) fall back gracefully
+_ON_SPACES = os.environ.get("SPACE_ID") is not None
+SUPPORTED_LANGUAGES = {
+    "Bambara (bam)":    "bam",
+    "Fula (ful)":       "ful",
+    "French / Français": "fr",
+    "English":          "en",
+}
+# ── ZeroGPU decorator (no-op locally) ────────────────────────────────────────
+try:
+    import spaces  # type: ignore
+    _gpu = spaces.GPU(duration=55)
+except ImportError:
+    def _gpu(fn):           # local fallback: plain function
+        return fn
+# ── Module-level model state (CPU-resident between requests) ─────────────────
+_whisper_model     = None   # WhisperForConditionalGeneration (base)
+_whisper_processor = None
+_adapter_manager   = None   # AdapterManager (wraps base model with PEFT if adapters loaded)
+_model_lock        = threading.Lock()
+_model_status      = "not loaded"
+_adapters_loaded   = set()  # set of language codes with loaded adapters, e.g. {"bam", "ful"}
+from src.tts.mms_tts          import MMSTTSEngine
+from src.iot.intent_parser     import IntentParser
+from src.iot.sensor_bridge     import SensorBridge
+from src.iot.voice_responder   import VoiceResponder
+_tts            = MMSTTSEngine()
+_intent_parser  = IntentParser()
+_sensor_bridge  = SensorBridge()
+# HF API — only instantiate when token present
+_hf_api = None
+if HF_TOKEN:
+    from huggingface_hub import HfApi
+    _hf_api = HfApi(token=HF_TOKEN)
+# ── Model loading ─────────────────────────────────────────────────────────────
+def _do_load_whisper():
+    global _whisper_model, _whisper_processor, _adapter_manager, _model_status
+    import torch
+    from transformers import WhisperForConditionalGeneration, WhisperProcessor
+    from src.engine.adapter_manager import AdapterManager
+    _model_status = "loading…"
+    try:
+        _whisper_processor = WhisperProcessor.from_pretrained(
+            WHISPER_MODEL_ID, token=HF_TOKEN
+        )
+        _whisper_model = WhisperForConditionalGeneration.from_pretrained(
+            WHISPER_MODEL_ID,
+            torch_dtype=torch.float32,
+            token=HF_TOKEN,
+        )
+        _whisper_model.eval()
+        # Create the AdapterManager wrapping the base model
+        _adapter_manager = AdapterManager(base_model=_whisper_model, config={})
+        # Try to load adapters from the local adapter repo snapshot (if already downloaded)
+        _try_load_local_adapters()
+        _model_status = f"ready ({WHISPER_MODEL_ID})"
+    except Exception as e:
+        _model_status = f"error: {e}"
+def _try_load_local_adapters() -> None:
+    """Load any adapter snapshots that are already on disk (downloaded previously)."""
+    global _adapters_loaded
+    if _adapter_manager is None:
+        return
+    if not ADAPTER_REPO_ID:
+        return
+    try:
+        from huggingface_hub import try_to_load_from_cache
+        lang_dirs = {"bam": "adapters/bambara", "ful": "adapters/fula"}
+        for lang, subdir in lang_dirs.items():
+            cached = try_to_load_from_cache(
+                repo_id=ADAPTER_REPO_ID,
+                filename=f"{subdir}/adapter_config.json",
+                repo_type="model",
+                token=HF_TOKEN,
+            )
+            if cached:
+                import os
+                adapter_path = str(os.path.dirname(cached))
+                _adapter_manager.register(lang, adapter_path)
+                try:
+                    _adapter_manager.load_adapter(lang)
+                    _adapters_loaded.add(lang)
+                except Exception:
+                    pass
+    except Exception:
+        pass  # Adapters not cached yet — will load after first Hub download
+def _ensure_whisper_loaded():
+    """Load Whisper to CPU in a background thread on first call. Non-blocking."""
+    global _model_status
+    with _model_lock:
+        if _whisper_model is None and "loading" not in _model_status and "error" not in _model_status:
+            t = threading.Thread(target=_do_load_whisper, daemon=True)
+            t.start()
+    return _model_status
+def get_model_status() -> str:
+    s = _ensure_whisper_loaded()
+    if "ready" in s:
+        return f"🟢 {s}"
+    if "loading" in s:
+        return f"🟡 {s}"
+    if "error" in s:
+        return f"🔴 {s}"
+    return f"⚪ {s}"
+# ── Core GPU pipeline ─────────────────────────────────────────────────────────
+@_gpu
+def _run_pipeline(audio_path: str, language_code: str):
+    """
+    Full STT → Intent → Sensor → TTS pipeline.
+    Decorated with @spaces.GPU(duration=55) on HF Spaces; plain function locally.
+    Returns: (transcript, response_text, (sample_rate, wav_np))
+    """
+    import asyncio
+    import torch
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    # ── 1. Whisper STT ────────────────────────────────────────────────────────
+    if _whisper_model is None:
+        return "⏳ Model still loading…", "", None
+    import librosa
+    audio_np, _ = librosa.load(audio_path, sr=16000, mono=True)
+    # Use adapter-wrapped model if an adapter for this language is loaded;
+    # otherwise fall back to base Whisper.
+    if _adapter_manager is not None and language_code in _adapters_loaded:
+        _adapter_manager.activate(language_code)
+        active_model = _adapter_manager.get_model()
+    else:
+        active_model = _whisper_model
+    active_model.to(device)
+    with _model_lock:
+        inputs = _whisper_processor.feature_extractor(
+            audio_np, sampling_rate=16000, return_tensors="pt"
+        )
+        input_features = inputs.input_features.to(device)
+        # Bambara and Fula have no Whisper language token — pass None so the model
+        # auto-detects or falls back to multilingual decoding.
+        if language_code in ("bam", "ful"):
+            forced_ids = None
+        else:
+            forced_ids = _whisper_processor.get_decoder_prompt_ids(
+                language=language_code, task="transcribe"
+            )
+        with torch.no_grad():
+            predicted_ids = active_model.generate(
+                input_features,
+                forced_decoder_ids=forced_ids if forced_ids else None,
+                max_new_tokens=256,
+            )
+    transcript = _whisper_processor.batch_decode(
+        predicted_ids, skip_special_tokens=True
+    )[0].strip()
+    # Free GPU VRAM before TTS
+    active_model.to("cpu")
+    if device == "cuda":
+        torch.cuda.empty_cache()
+    # ── 2. Intent + sensor data (CPU) ─────────────────────────────────────────
+    intent = _intent_parser.parse(transcript, language=language_code)
+    try:
+        loop = asyncio.new_event_loop()
+        sensor_data = loop.run_until_complete(_sensor_bridge.fetch(intent))
+        loop.close()
+    except Exception:
+        from src.iot.sensor_bridge import SensorData
+        sensor_data = SensorData(sensor_type="soil", values={
+            "moisture_pct": 45.0, "ph": 6.5, "temperature_c": 28.0
+        })
+    responder = VoiceResponder(language=language_code)
+    response_text = responder.generate_response(intent, sensor_data)
+    # ── 3. MMS-TTS (GPU) ──────────────────────────────────────────────────────
+    wav_np, sample_rate = _tts.synthesize(response_text, language_code, device=device)
+    return transcript, response_text, (sample_rate, wav_np)
+# ── HF Hub feedback persistence ───────────────────────────────────────────────
+def _save_feedback_to_hub(
+    audio_path: str | None,
+    transcript: str,
+    corrected_text: str,
+    response_text: str,
+    rating: int,
+    notes: str,
+    language_label: str,
+) -> str:
+    language_code = SUPPORTED_LANGUAGES.get(language_label, "bam")
+    if not corrected_text.strip():
+        return "⚠️ Corrected text is empty."
+    timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S_%f")
+    record = {
+        "id": timestamp,
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "language": language_code,
+        "audio_file": f"audio/{language_code}_{timestamp}.wav",
+        "whisper_output": transcript,
+        "corrected_text": corrected_text.strip(),
+        "response_text": response_text,
+        "rating": rating,
+        "notes": notes.strip(),
+        "is_correction": transcript.strip() != corrected_text.strip(),
+        "model": WHISPER_MODEL_ID,
+    }
+    if _hf_api is None:
+        # Local: save to disk instead
+        fb_dir = ROOT / "feedback"
+        fb_dir.mkdir(exist_ok=True)
+        (fb_dir / "audio").mkdir(exist_ok=True)
+        corrections_path = fb_dir / "corrections.jsonl"
+        if audio_path:
+            import shutil
+            shutil.copy2(audio_path, fb_dir / "audio" / f"{language_code}_{timestamp}.wav")
+        with open(corrections_path, "a", encoding="utf-8") as f:
+            f.write(json.dumps(record, ensure_ascii=False) + "\n")
+        total = sum(1 for _ in open(corrections_path, encoding="utf-8"))
+        return f"✅ Saved locally (#{total}) — HF_TOKEN not set, Hub upload skipped."
+    try:
+        # Upload audio
+        if audio_path:
+            _hf_api.upload_file(
+                path_or_fileobj=audio_path,
+                path_in_repo=f"audio/{language_code}_{timestamp}.wav",
+                repo_id=FEEDBACK_REPO_ID,
+                repo_type="dataset",
+            )
+        # Download → append → re-upload corrections.jsonl (with retry on conflict)
+        from huggingface_hub import hf_hub_download
+        for attempt in range(2):
+            try:
+                local_jsonl = hf_hub_download(
+                    repo_id=FEEDBACK_REPO_ID,
+                    filename="corrections.jsonl",
+                    repo_type="dataset",
+                    token=HF_TOKEN,
+                )
+                with open(local_jsonl, encoding="utf-8") as f:
+                    existing = f.read()
+            except Exception:
+                existing = ""
+            updated = existing + json.dumps(record, ensure_ascii=False) + "\n"
+            buf = io.BytesIO(updated.encode("utf-8"))
+            try:
+                _hf_api.upload_file(
+                    path_or_fileobj=buf,
+                    path_in_repo="corrections.jsonl",
+                    repo_id=FEEDBACK_REPO_ID,
+                    repo_type="dataset",
+                )
+                break
+            except Exception as e:
+                if attempt == 1:
+                    return f"⚠️ Audio uploaded but corrections.jsonl update failed: {e}"
+        total = updated.count("\n")
+        return f"✅ Saved to Hub (#{total}) — {FEEDBACK_REPO_ID}"
+    except Exception as e:
+        return f"❌ Hub upload error: {e}"
+# ── Adapter reload ────────────────────────────────────────────────────────────
+def _reload_adapters_from_hub() -> str:
+    global _adapters_loaded
+    if _hf_api is None:
+        return "⚠️ HF_TOKEN not set — cannot download adapters."
+    if _adapter_manager is None:
+        return "⏳ Base model not loaded yet — wait for model to finish loading and try again."
+    try:
+        from huggingface_hub import snapshot_download
+        local_dir = snapshot_download(
+            repo_id=ADAPTER_REPO_ID, repo_type="model", token=HF_TOKEN
+        )
+        results = []
+        for lang, subdir in (("bam", "adapters/bambara"), ("ful", "adapters/fula")):
+            adapter_path = Path(local_dir) / subdir
+            if not adapter_path.exists():
+                results.append(f"⚠️ {lang}: `{subdir}` not found in repo")
+                continue
+            # Check that this looks like a valid PEFT adapter
+            if not (adapter_path / "adapter_config.json").exists():
+                results.append(f"⚠️ {lang}: `{subdir}` missing adapter_config.json — run training first")
+                continue
+            try:
+                _adapter_manager.register(lang, str(adapter_path))
+                _adapter_manager.load_adapter(lang)
+                _adapters_loaded.add(lang)
+                results.append(f"✅ {lang}: adapter loaded from `{subdir}`")
+            except Exception as e:
+                results.append(f"❌ {lang}: load failed — {e}")
+        summary = "\n".join(results)
+        active = ", ".join(_adapters_loaded) if _adapters_loaded else "none"
+        return f"{summary}\n\n**Active adapters:** {active}\n**Repo:** `{ADAPTER_REPO_ID}`"
+    except Exception as e:
+        return f"❌ Adapter reload failed: {e}"
+def _get_adapter_status() -> str:
+    lines = []
+    # Show which adapters are currently active in memory
+    if _adapters_loaded:
+        lines.append(f"**Active adapters (in memory):** {', '.join(sorted(_adapters_loaded))}")
+    else:
+        lines.append("**Active adapters:** none — using base Whisper")
+    if _hf_api is None:
+        lines.append("_HF_TOKEN not set — Hub check skipped._")
+        return "\n".join(lines)
+    try:
+        from huggingface_hub import list_repo_files
+        files = list(list_repo_files(ADAPTER_REPO_ID, repo_type="model", token=HF_TOKEN))
+        bam_ok = any("bambara" in f and "adapter_config" in f for f in files)
+        ful_ok = any("fula" in f and "adapter_config" in f for f in files)
+        lines += [
+            f"\n**Hub repo:** `{ADAPTER_REPO_ID}`",
+            f"- Bambara (bam): {'✅ trained adapter present' if bam_ok else '⚠️ not yet trained — run bootstrap notebook'}",
+            f"- Fula (ful): {'✅ trained adapter present' if ful_ok else '⚠️ not yet trained — run bootstrap notebook'}",
+        ]
+        if bam_ok or ful_ok:
+            lines.append("\n_Click **Reload Adapters** to activate them._")
+    except Exception as e:
+        lines.append(f"_Could not read Hub repo: {e}_")
+    return "\n".join(lines)
+# ── Main ask handler ──────────────────────────────────────────────────────────
+def handle_ask(audio_path, language_label):
+    if audio_path is None:
+        return "⚠️ No audio — press Record or upload a file.", "", None
+    language_code = SUPPORTED_LANGUAGES.get(language_label, "bam")
+    status = _ensure_whisper_loaded()
+    if _whisper_model is None:
+        return f"⏳ Model loading ({status}). Wait a moment and try again.", "", None
+    try:
+        transcript, response_text, audio_out = _run_pipeline(audio_path, language_code)
+        return transcript, response_text, audio_out
+    except Exception as e:
+        return f"❌ {e}", "", None
+# ── Gradio UI ─────────────────────────────────────────────────────────────────
+def build_ui() -> gr.Blocks:
+    with gr.Blocks(title="Sahel-Agri Voice AI") as demo:
+        gr.Markdown("# 🌾 Sahel-Agri Voice AI")
+        gr.Markdown(
+            "Speak in **Bambara** or **Fula** — get agricultural insights spoken back "
+            "in your language. Also supports French and English."
+        )
+        model_status_box = gr.Textbox(
+            value=get_model_status,
+            label="Model status",
+            interactive=False,
+            every=3,
+        )
+        with gr.Tabs():
+            # ── Tab 1: Voice Assistant ────────────────────────────────────────
+            with gr.TabItem("🎙️ Voice Assistant"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        language_dd = gr.Dropdown(
+                            choices=list(SUPPORTED_LANGUAGES.keys()),
+                            value="Bambara (bam)",
+                            label="Language / Kan",
+                        )
+                        audio_input = gr.Audio(
+                            sources=["microphone", "upload"],
+                            type="filepath",
+                            label="Record or upload audio",
+                        )
+                        ask_btn = gr.Button("▶ Ask / Ɲinɛ", variant="primary")
+                    with gr.Column(scale=1):
+                        transcript_box = gr.Textbox(
+                            label="Whisper heard",
+                            lines=3,
+                            placeholder="Your words will appear here…",
+                            interactive=False,
+                        )
+                        response_box = gr.Textbox(
+                            label="Response / Jaabi",
+                            lines=3,
+                            placeholder="Agricultural advice will appear here…",
+                            interactive=False,
+                        )
+                        audio_output = gr.Audio(
+                            label="Voice response",
+                            autoplay=True,
+                            interactive=False,
+                        )
+                ask_btn.click(
+                    fn=handle_ask,
+                    inputs=[audio_input, language_dd],
+                    outputs=[transcript_box, response_box, audio_output],
+                )
+            # ── Tab 2: Feedback & Correction ─────────────────────────────────
+            with gr.TabItem("📝 Feedback & Correction"):
+                gr.Markdown(
+                    "Help improve the model by correcting transcription errors. "
+                    "Your audio and corrections are saved to the training dataset."
+                )
+                with gr.Row():
+                    with gr.Column():
+                        fb_lang = gr.Dropdown(
+                            choices=list(SUPPORTED_LANGUAGES.keys()),
+                            value="Bambara (bam)",
+                            label="Language",
+                        )
+                        fb_audio = gr.Audio(
+                            sources=["microphone", "upload"],
+                            type="filepath",
+                            label="Audio (re-record or upload)",
+                        )
+                        fb_transcript = gr.Textbox(
+                            label="Whisper output (what it heard)",
+                            lines=3,
+                            placeholder="Paste or type what Whisper said…",
+                        )
+                        fb_corrected = gr.Textbox(
+                            label="Corrected transcription (what was actually said)",
+                            lines=3,
+                            placeholder="Type the correct text here…",
+                        )
+                    with gr.Column():
+                        fb_response = gr.Textbox(
+                            label="Response text (optional — for rating)",
+                            lines=2,
+                            placeholder="Copy the response from Tab 1…",
+                        )
+                        fb_rating = gr.Slider(
+                            minimum=1, maximum=5, step=1, value=3,
+                            label="Response quality (1 = poor, 5 = excellent)",
+                        )
+                        fb_notes = gr.Textbox(
+                            label="Notes (optional)",
+                            lines=2,
+                            placeholder="e.g. noisy background, strong accent…",
+                        )
+                        save_btn = gr.Button("💾 Save to Dataset", variant="secondary")
+                        save_status = gr.Textbox(
+                            label="Save status", interactive=False, lines=2
+                        )
+                save_btn.click(
+                    fn=_save_feedback_to_hub,
+                    inputs=[
+                        fb_audio, fb_transcript, fb_corrected,
+                        fb_response, fb_rating, fb_notes, fb_lang,
+                    ],
+                    outputs=[save_status],
+                )
+            # ── Tab 3: Training Status ────────────────────────────────────────
+            with gr.TabItem("🔧 Training Status"):
+                gr.Markdown(
+                    "After collecting ≥10 corrections per language, run the training "
+                    "notebook on Google Colab (free GPU), then reload adapters here."
+                )
+                adapter_status_md = gr.Markdown(value=_get_adapter_status())
+                reload_btn = gr.Button("🔄 Reload Adapters from Hub")
+                reload_out = gr.Markdown()
+                gr.Markdown("---")
+                gr.Markdown(
+                    "**Training notebook**: "
+                    "`notebooks/train_colab.ipynb` — open in Colab, run all cells."
+                )
+                gr.Markdown(
+                    "**Feedback dataset**: "
+                    f"`{FEEDBACK_REPO_ID}` (private, auto-updated on each save)"
+                )
+                gr.Markdown(
+                    "**Adapter repo**: "
+                    f"`{ADAPTER_REPO_ID}` (private, updated after each training run)"
+                )
+                reload_btn.click(
+                    fn=_reload_adapters_from_hub,
+                    outputs=[reload_out],
+                )
+                reload_btn.click(
+                    fn=_get_adapter_status,
+                    outputs=[adapter_status_md],
+                )
+    return demo
+# ── Entry point ───────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    from dotenv import load_dotenv
+    load_dotenv()
+    # Re-read env after dotenv
+    HF_TOKEN         = os.environ.get("HF_TOKEN")
+    FEEDBACK_REPO_ID = os.environ.get("FEEDBACK_REPO_ID", "ous-sow/sahel-agri-feedback")
+    ADAPTER_REPO_ID  = os.environ.get("ADAPTER_REPO_ID",  "ous-sow/sahel-agri-adapters")
+    WHISPER_MODEL_ID = os.environ.get("WHISPER_MODEL_ID",  "openai/whisper-small")
+    if HF_TOKEN:
+        from huggingface_hub import HfApi
+        _hf_api = HfApi(token=HF_TOKEN)
+    # Kick off background model load immediately
+    _ensure_whisper_loaded()
+    print(f"Whisper model : {WHISPER_MODEL_ID}")
+    print(f"Feedback repo : {FEEDBACK_REPO_ID}")
+    print(f"Adapter repo  : {ADAPTER_REPO_ID}")
+    print(f"HF_TOKEN set  : {'yes' if HF_TOKEN else 'no (local-only mode)'}")
+    print()
+    demo = build_ui()
+    demo.launch(
+        server_port=9001,
+        inbrowser=True,
+        share=False,
+    )

configs/api_config.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+server:
+  host: "0.0.0.0"
+  port: 8000
+  workers: 1                   # Single worker: shares GPU model in memory
+  timeout_keep_alive: 30
+inference:
+  default_language: "bam"
+  max_audio_size_mb: 10
+  supported_languages:
+    - "bam"
+    - "ful"
+iot:
+  sensor_poll_timeout_s: 5
+  response_language: "fr"      # French for farmer-facing TTS output
+  intent_confidence_threshold: 0.7
+rate_limit:
+  requests_per_minute: 60
+  burst: 10

configs/base_config.yaml ADDED Viewed

	@@ -0,0 +1,30 @@

+model:
+  id: "openai/whisper-large-v3-turbo"
+  task: "transcribe"
+  max_new_tokens: 128
+  chunk_length_s: 30
+training:
+  output_dir: "./adapters"
+  per_device_train_batch_size: 4
+  gradient_accumulation_steps: 4
+  warmup_steps: 200
+  max_steps: 4000
+  save_steps: 500
+  eval_steps: 500
+  learning_rate: 1.0e-4
+  fp16: true
+  # CRITICAL on Windows: multiprocessing spawn breaks with tokenizers
+  dataloader_num_workers: 0
+audio:
+  sample_rate: 16000
+  max_duration_s: 30
+  noise_snr_db_range: [5, 20]
+  augmentation_prob: 0.6
+paths:
+  data_cache: "./data_cache"
+  adapters: "./adapters"
+  models: "./models"
+  noise_samples: "./noise_samples"

configs/lora_bambara.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+language: "bam"
+language_code: "bm"          # ISO 639-1 code used for Whisper forced_decoder_ids
+dataset_subset: "bam"
+adapter_name: "bambara"
+output_dir: "./adapters/bambara"
+lora:
+  r: 32
+  lora_alpha: 64
+  target_modules:
+    - "q_proj"
+    - "v_proj"
+    - "k_proj"
+    - "out_proj"
+    - "fc1"
+    - "fc2"
+  lora_dropout: 0.05
+  bias: "none"
+  task_type: "SEQ_2_SEQ_LM"

configs/lora_fula.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+language: "ful"
+language_code: "ff"           # ISO 639-1 code used for Whisper forced_decoder_ids
+dataset_subset: "ful"
+adapter_name: "fula"
+output_dir: "./adapters/fula"
+lora:
+  r: 16                        # Smaller rank — Fula dataset is smaller than Bambara
+  lora_alpha: 32
+  target_modules:
+    - "q_proj"
+    - "v_proj"
+    - "k_proj"
+    - "out_proj"
+    - "fc1"
+    - "fc2"
+  lora_dropout: 0.05
+  bias: "none"
+  task_type: "SEQ_2_SEQ_LM"

noise_samples/README.md ADDED Viewed

	@@ -0,0 +1,20 @@

+# Field Noise Samples
+Place `.wav` audio files here to enable realistic field-noise augmentation during training.
+## Required Files (16kHz mono, any duration ≥5s)
+- `tractor_engine.wav` — diesel tractor idling or working
+- `wind_field.wav` — wind in open farmland
+- `livestock_ambient.wav` — cattle, goats, or chickens in background
+## Suggested Sources
+- [Freesound.org](https://freesound.org) — search "tractor", "wind field", "livestock ambient" (filter by CC0 / CC-BY)
+- Field recordings from partner NGOs or agricultural organizations in Mali/Guinea
+## Licensing Note
+Ensure all audio files are licensed for use in ML training datasets.
+CC0 (public domain) or CC-BY are preferred.
+## Without Noise Files
+The augmenter will fall back to Gaussian noise only.
+Training will still work but model robustness to real-world conditions may be reduced.

notebooks/bootstrap_repos.ipynb ADDED Viewed

	@@ -0,0 +1,308 @@

+{
+ "nbformat": 4,
+ "nbformat_minor": 5,
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.10.0"
+  },
+  "colab": {
+   "provenance": [],
+   "gpuType": "T4"
+  },
+  "accelerator": "GPU"
+ },
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "cell-title",
+   "metadata": {},
+   "source": [
+    "# 🌾 Sahel-Agri Voice AI — One-Time Bootstrap\n",
+    "\n",
+    "**Run this notebook ONCE** before deploying your Space. It:\n",
+    "\n",
+    "1. Creates the three HuggingFace repos (`sahel-agri-feedback`, `sahel-agri-adapters`, `sahel-agri-voice`)\n",
+    "2. Seeds the feedback dataset with a `corrections.jsonl` placeholder\n",
+    "3. Trains v0 LoRA adapters for **Bambara** and **Fula** on the full Google Waxal dataset\n",
+    "4. Pushes adapters to `ous-sow/sahel-agri-adapters`\n",
+    "\n",
+    "After this notebook completes, push your project code to the Space and your app will start\n",
+    "with working Bambara/Fula speech recognition from day 1 — **no user corrections needed yet**.\n",
+    "\n",
+    "For subsequent improvement runs (after collecting farmer feedback), use `train_colab.ipynb`.\n",
+    "\n",
+    "---\n",
+    "**Before running:** Runtime → Change runtime type → **T4 GPU**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-gpu-check",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 1 — GPU check\n",
+    "import subprocess\n",
+    "result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)\n",
+    "if result.returncode != 0:\n",
+    "    raise RuntimeError('No GPU! Runtime → Change runtime type → T4 GPU')\n",
+    "print(result.stdout[:500])\n",
+    "print('✅ GPU ready')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-install",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 2 — Install dependencies\n",
+    "!pip install -q \\\n",
+    "    torch==2.11.0 torchaudio==2.11.0 \\\n",
+    "    transformers==5.5.0 datasets==4.8.4 \\\n",
+    "    accelerate==1.13.0 evaluate==0.4.2 \\\n",
+    "    huggingface-hub==1.9.0 peft==0.18.1 \\\n",
+    "    librosa==0.10.2 soundfile==0.12.1 \\\n",
+    "    jiwer==3.0.4 pyyaml==6.0.2\n",
+    "print('✅ Packages installed')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-hf-login",
+   "metadata": {},
+   "outputs": [],
+   "source": "# Cell 3 — HuggingFace login\n# Colab: 🔑 icon (left sidebar) → Add new secret → name=HF_TOKEN\nimport os\ntry:\n    from google.colab import userdata  # type: ignore\n    HF_TOKEN = userdata.get('HF_TOKEN')\nexcept Exception:\n    HF_TOKEN = os.environ.get('HF_TOKEN', '')\n\nif not HF_TOKEN:\n    raise ValueError(\n        'HF_TOKEN not found.\\n'\n        'Colab: click the 🔑 icon → Add new secret → name=HF_TOKEN'\n    )\n\nfrom huggingface_hub import login, HfApi\nlogin(token=HF_TOKEN, add_to_git_credential=False)\napi = HfApi(token=HF_TOKEN)\n\nHF_USERNAME      = 'ous-sow'\nFEEDBACK_REPO_ID = f'{HF_USERNAME}/sahel-agri-feedback'\nADAPTER_REPO_ID  = f'{HF_USERNAME}/sahel-agri-adapters'\nSPACE_REPO_ID    = f'{HF_USERNAME}/sahel-agri-voice'\n# whisper-small trains on Colab T4 in ~25 min and runs on CPU in ~10s.\n# Change to 'openai/whisper-large-v3-turbo' only if you upgrade to a GPU Space.\nWHISPER_MODEL_ID = 'openai/whisper-small'\n\nprint(f'✅ Logged in as {HF_USERNAME}')"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-create-repos",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 4 — Create HuggingFace repos (skips if they already exist)\n",
+    "from huggingface_hub import RepoUrl\n",
+    "\n",
+    "def create_repo_if_missing(repo_id, repo_type, private=True):\n",
+    "    try:\n",
+    "        url = api.create_repo(\n",
+    "            repo_id=repo_id,\n",
+    "            repo_type=repo_type,\n",
+    "            private=private,\n",
+    "            exist_ok=True,\n",
+    "        )\n",
+    "        print(f'  ✅ {repo_type}: {repo_id}')\n",
+    "        return url\n",
+    "    except Exception as e:\n",
+    "        print(f'  ⚠️  {repo_id}: {e}')\n",
+    "\n",
+    "print('Creating repos...')\n",
+    "create_repo_if_missing(FEEDBACK_REPO_ID, 'dataset',  private=True)\n",
+    "create_repo_if_missing(ADAPTER_REPO_ID,  'model',    private=True)\n",
+    "create_repo_if_missing(SPACE_REPO_ID,    'space',    private=False)\n",
+    "\n",
+    "# Seed the feedback dataset with an empty corrections.jsonl\n",
+    "import io\n",
+    "try:\n",
+    "    api.upload_file(\n",
+    "        path_or_fileobj=io.BytesIO(b''),\n",
+    "        path_in_repo='corrections.jsonl',\n",
+    "        repo_id=FEEDBACK_REPO_ID,\n",
+    "        repo_type='dataset',\n",
+    "        commit_message='Init: empty corrections.jsonl',\n",
+    "    )\n",
+    "    print(f'  ✅ {FEEDBACK_REPO_ID}/corrections.jsonl initialised')\n",
+    "except Exception as e:\n",
+    "    print(f'  ⚠️  corrections.jsonl upload: {e} (may already exist)')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-clone-space",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 5 — Clone Space code (so we can use src/ and configs/)\n",
+    "# If the Space is brand new and has no code yet, clone from the local zip instead.\n",
+    "import sys\n",
+    "from pathlib import Path\n",
+    "from huggingface_hub import snapshot_download\n",
+    "\n",
+    "try:\n",
+    "    space_dir = Path(snapshot_download(\n",
+    "        repo_id=SPACE_REPO_ID, repo_type='space', token=HF_TOKEN\n",
+    "    ))\n",
+    "    print(f'Space code: {space_dir}')\n",
+    "except Exception as e:\n",
+    "    print(f'Could not download Space ({e})')\n",
+    "    print('Uploading project code to Space first...')\n",
+    "    # If you have the project on Colab already (e.g. mounted Drive), set:\n",
+    "    # space_dir = Path('/content/drive/MyDrive/voice-model')\n",
+    "    # Otherwise upload via git (see README step 6) and re-run this cell.\n",
+    "    raise RuntimeError(\n",
+    "        'Push your project to the Space first:\\n'\n",
+    "        '  git remote add space https://huggingface.co/spaces/ous-sow/sahel-agri-voice\\n'\n",
+    "        '  git push space main\\n'\n",
+    "        'Then re-run this notebook.'\n",
+    "    )\n",
+    "\n",
+    "sys.path.insert(0, str(space_dir))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-train-bam",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 6 — Train v0 Bambara adapter on full Waxal (bam)\n",
+    "#\n",
+    "# Uses streaming — Waxal is ~4h of audio, we cap at 2000 samples for Colab budget.\n",
+    "# Full training (~4000 steps) on the entire dataset: use a Kaggle P100 (12h limit).\n",
+    "import os, yaml\n",
+    "os.environ['HF_TOKEN'] = HF_TOKEN\n",
+    "\n",
+    "from src.training.trainer import WhisperLoRATrainer\n",
+    "\n",
+    "WAXAL_CAP = 2000  # raise to 10000+ on Kaggle for a stronger v0 model\n",
+    "\n",
+    "base_cfg    = str(space_dir / 'configs' / 'base_config.yaml')\n",
+    "bam_cfg_src = str(space_dir / 'configs' / 'lora_bambara.yaml')\n",
+    "bam_out     = '/tmp/sahel_adapter_bam'\n",
+    "\n",
+    "# Override output_dir\n",
+    "with open(bam_cfg_src) as f:\n",
+    "    bam_config = yaml.safe_load(f)\n",
+    "bam_config['output_dir'] = bam_out\n",
+    "tmp_bam_cfg = '/tmp/lora_bam.yaml'\n",
+    "with open(tmp_bam_cfg, 'w') as f:\n",
+    "    yaml.dump(bam_config, f)\n",
+    "\n",
+    "# Also override max_steps in base config to match Waxal cap\n",
+    "with open(base_cfg) as f:\n",
+    "    base_config = yaml.safe_load(f)\n",
+    "# ~2 steps per sample @ batch_size=4, gradient_acc=4\n",
+    "base_config['training']['max_steps'] = max(500, WAXAL_CAP // 8)\n",
+    "tmp_base_cfg = '/tmp/base_config.yaml'\n",
+    "with open(tmp_base_cfg, 'w') as f:\n",
+    "    yaml.dump(base_config, f)\n",
+    "\n",
+    "print(f'Training Bambara v0 adapter (Waxal cap={WAXAL_CAP}, max_steps={base_config[\"training\"][\"max_steps\"]})...')\n",
+    "trainer_bam = WhisperLoRATrainer(\n",
+    "    base_config_path=tmp_base_cfg,\n",
+    "    language_config_path=tmp_bam_cfg,\n",
+    ")\n",
+    "trainer_bam.setup()\n",
+    "\n",
+    "# No feedback yet — materialise Waxal and train\n",
+    "trainer_bam.merge_extra_data([], repeat=1, waxal_cap=WAXAL_CAP)\n",
+    "\n",
+    "trainer_bam.train()\n",
+    "print(f'✅ Bambara v0 adapter saved to {bam_out}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-train-ful",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 7 — Train v0 Fula adapter on full Waxal (ful)\n",
+    "ful_cfg_src = str(space_dir / 'configs' / 'lora_fula.yaml')\n",
+    "ful_out     = '/tmp/sahel_adapter_ful'\n",
+    "\n",
+    "with open(ful_cfg_src) as f:\n",
+    "    ful_config = yaml.safe_load(f)\n",
+    "ful_config['output_dir'] = ful_out\n",
+    "tmp_ful_cfg = '/tmp/lora_ful.yaml'\n",
+    "with open(tmp_ful_cfg, 'w') as f:\n",
+    "    yaml.dump(ful_config, f)\n",
+    "\n",
+    "print(f'Training Fula v0 adapter (Waxal cap={WAXAL_CAP})...')\n",
+    "trainer_ful = WhisperLoRATrainer(\n",
+    "    base_config_path=tmp_base_cfg,\n",
+    "    language_config_path=tmp_ful_cfg,\n",
+    ")\n",
+    "trainer_ful.setup()\n",
+    "trainer_ful.merge_extra_data([], repeat=1, waxal_cap=WAXAL_CAP)\n",
+    "trainer_ful.train()\n",
+    "print(f'✅ Fula v0 adapter saved to {ful_out}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-push-adapters",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 8 — Push both adapters to HF Model repo\n",
+    "from huggingface_hub import HfApi\n",
+    "api = HfApi(token=HF_TOKEN)\n",
+    "\n",
+    "for lang, out_dir, path_in_repo in [\n",
+    "    ('bam', bam_out, 'adapters/bambara'),\n",
+    "    ('ful', ful_out, 'adapters/fula'),\n",
+    "]:\n",
+    "    api.upload_folder(\n",
+    "        folder_path=out_dir,\n",
+    "        repo_id=ADAPTER_REPO_ID,\n",
+    "        repo_type='model',\n",
+    "        path_in_repo=path_in_repo,\n",
+    "        commit_message=f'v0 {lang} adapter trained on Waxal (cap={WAXAL_CAP} samples)',\n",
+    "    )\n",
+    "    print(f'✅ {lang} → {ADAPTER_REPO_ID}/{path_in_repo}')\n",
+    "\n",
+    "print()\n",
+    "print('Bootstrap complete!')\n",
+    "print()\n",
+    "print('Next steps:')\n",
+    "print('  1. Push your project code to the Space (git push space main)')\n",
+    "print('  2. In Space Settings → Secrets, add HF_TOKEN, FEEDBACK_REPO_ID, ADAPTER_REPO_ID')\n",
+    "print('  3. Space will build — your app at https://huggingface.co/spaces/ous-sow/sahel-agri-voice')\n",
+    "print('  4. Tab 3 → Reload Adapters — Bambara + Fula adapters will be loaded')\n",
+    "print('  5. Collect farmer corrections, then run train_colab.ipynb to keep improving')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-verify",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 9 — Quick verification: list what was pushed to the adapter repo\n",
+    "from huggingface_hub import list_repo_files\n",
+    "\n",
+    "files = sorted(list_repo_files(ADAPTER_REPO_ID, repo_type='model', token=HF_TOKEN))\n",
+    "print(f'Files in {ADAPTER_REPO_ID}:')\n",
+    "for f in files:\n",
+    "    print(f'  {f}')\n",
+    "\n",
+    "bam_ok = any('bambara/adapter_config.json' in f for f in files)\n",
+    "ful_ok = any('fula/adapter_config.json' in f for f in files)\n",
+    "print()\n",
+    "print(f'Bambara adapter: {\"✅\" if bam_ok else \"❌\"}')\n",
+    "print(f'Fula    adapter: {\"✅\" if ful_ok else \"❌\"}')\n",
+    "\n",
+    "if bam_ok and ful_ok:\n",
+    "    print('\\n🎉 Both adapters ready. Your Space will use them automatically on the next reload.')\n",
+    "else:\n",
+    "    print('\\n⚠️  Some adapters are missing — check the training cells above for errors.')"
+   ]
+  }
+ ]
+}

notebooks/train_colab.ipynb ADDED Viewed

	@@ -0,0 +1,283 @@

+{
+ "nbformat": 4,
+ "nbformat_minor": 5,
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.10.0"
+  },
+  "colab": {
+   "provenance": [],
+   "gpuType": "T4"
+  },
+  "accelerator": "GPU"
+ },
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "cell-title",
+   "metadata": {},
+   "source": [
+    "# 🌾 Sahel-Agri Voice AI — Fine-tune on Farmer Feedback\n",
+    "\n",
+    "**Run after collecting ≥10 corrections in the Space.**  \n",
+    "First run? Use `bootstrap_repos.ipynb` instead to train the v0 Waxal adapter.\n",
+    "\n",
+    "This notebook fine-tunes the existing LoRA adapter using:\n",
+    "- **Waxal baseline** (up to 500 samples) — keeps the model grounded\n",
+    "- **Farmer corrections** (3× upsampled) — targeted improvement from real field use\n",
+    "\n",
+    "**Before running:** Runtime → Change runtime type → **T4 GPU**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-gpu-check",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 1 — GPU check\n",
+    "import subprocess\n",
+    "result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)\n",
+    "if result.returncode != 0:\n",
+    "    raise RuntimeError('No GPU! Runtime → Change runtime type → T4 GPU')\n",
+    "print(result.stdout[:500])\n",
+    "print('✅ GPU ready')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-install",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 2 — Install dependencies (matching Space versions)\n",
+    "!pip install -q \\\n",
+    "    torch==2.11.0 torchaudio==2.11.0 \\\n",
+    "    transformers==5.5.0 datasets==4.8.4 \\\n",
+    "    accelerate==1.13.0 evaluate==0.4.2 \\\n",
+    "    huggingface-hub==1.9.0 peft==0.18.1 \\\n",
+    "    librosa==0.10.2 soundfile==0.12.1 \\\n",
+    "    jiwer==3.0.4 pyyaml==6.0.2\n",
+    "print('✅ Packages installed')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-hf-login",
+   "metadata": {},
+   "outputs": [],
+   "source": "# Cell 3 — HuggingFace login\n# Colab: 🔑 icon (left sidebar) → Add new secret → name=HF_TOKEN\n# Kaggle: Add Data → add as Kaggle secret named HF_TOKEN\nimport os\ntry:\n    from google.colab import userdata  # type: ignore\n    HF_TOKEN = userdata.get('HF_TOKEN')\nexcept Exception:\n    HF_TOKEN = os.environ.get('HF_TOKEN', '')\n\nif not HF_TOKEN:\n    raise ValueError('HF_TOKEN not found — see instructions above.')\n\nfrom huggingface_hub import login\nlogin(token=HF_TOKEN, add_to_git_credential=False)\n\nSPACE_REPO_ID    = 'ous-sow/sahel-agri-voice'\nFEEDBACK_REPO_ID = 'ous-sow/sahel-agri-feedback'\nADAPTER_REPO_ID  = 'ous-sow/sahel-agri-adapters'\n# Must match what the Space uses — whisper-small for cpu-basic, whisper-large-v3-turbo for GPU.\nWHISPER_MODEL_ID = 'openai/whisper-small'\nTRAIN_LANG       = 'bam'   # ← change to 'ful' for Fula\n\nprint(f'✅ Logged in | training language: {TRAIN_LANG}')"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-download",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 4 — Download Space code and feedback corrections\n",
+    "import json, shutil, sys\n",
+    "from pathlib import Path\n",
+    "from huggingface_hub import snapshot_download, hf_hub_download\n",
+    "\n",
+    "# Get Space code (contains src/, configs/)\n",
+    "space_dir = Path(snapshot_download(\n",
+    "    repo_id=SPACE_REPO_ID, repo_type='space', token=HF_TOKEN\n",
+    "))\n",
+    "sys.path.insert(0, str(space_dir))\n",
+    "print(f'Space code: {space_dir}')\n",
+    "\n",
+    "# Download feedback corrections.jsonl\n",
+    "jsonl_path = hf_hub_download(\n",
+    "    repo_id=FEEDBACK_REPO_ID,\n",
+    "    filename='corrections.jsonl',\n",
+    "    repo_type='dataset',\n",
+    "    token=HF_TOKEN,\n",
+    ")\n",
+    "with open(jsonl_path, encoding='utf-8') as f:\n",
+    "    all_records = [json.loads(l) for l in f if l.strip()]\n",
+    "\n",
+    "corrections = [\n",
+    "    r for r in all_records\n",
+    "    if r.get('is_correction') and r['language'] == TRAIN_LANG\n",
+    "]\n",
+    "print(f'Total feedback records : {len(all_records)}')\n",
+    "print(f'Corrections for {TRAIN_LANG}  : {len(corrections)}')\n",
+    "\n",
+    "if len(corrections) < 5:\n",
+    "    print('⚠️  Very few corrections — consider collecting more before training.')\n",
+    "    print('   Training will proceed with Waxal only (corrections will be skipped).')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-download-audio",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 5 — Download feedback audio files from HF Dataset repo\n",
+    "fb_audio_dir = Path('/tmp/sahel_feedback_audio')\n",
+    "fb_audio_dir.mkdir(exist_ok=True)\n",
+    "\n",
+    "skipped = 0\n",
+    "for rec in corrections:\n",
+    "    local_path = fb_audio_dir / Path(rec['audio_file']).name\n",
+    "    if local_path.exists():\n",
+    "        continue\n",
+    "    try:\n",
+    "        dl = hf_hub_download(\n",
+    "            repo_id=FEEDBACK_REPO_ID,\n",
+    "            filename=rec['audio_file'],\n",
+    "            repo_type='dataset',\n",
+    "            token=HF_TOKEN,\n",
+    "        )\n",
+    "        shutil.copy(dl, local_path)\n",
+    "    except Exception as e:\n",
+    "        skipped += 1\n",
+    "        print(f'  skip {rec[\"audio_file\"]}: {e}')\n",
+    "\n",
+    "# Point records at local paths\n",
+    "for rec in corrections:\n",
+    "    local = fb_audio_dir / Path(rec['audio_file']).name\n",
+    "    if local.exists():\n",
+    "        rec['audio_file'] = str(local)\n",
+    "\n",
+    "available = [r for r in corrections if Path(r['audio_file']).exists()]\n",
+    "print(f'Downloaded {len(available)} / {len(corrections)} audio files (skipped {skipped})')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-train",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 6 — Fine-tune: Waxal baseline + farmer corrections\n",
+    "#\n",
+    "# WhisperLoRATrainer.setup() loads Waxal (streaming).\n",
+    "# merge_extra_data() materialises Waxal (up to 500 samples),\n",
+    "# appends corrections (3× upsampled), shuffles the combined dataset.\n",
+    "# train() runs standard Seq2SeqTrainer on the merged dataset.\n",
+    "\n",
+    "import os\n",
+    "os.environ['HF_TOKEN'] = HF_TOKEN\n",
+    "\n",
+    "from src.training.trainer import WhisperLoRATrainer\n",
+    "\n",
+    "lang_config_map = {'bam': 'lora_bambara.yaml', 'ful': 'lora_fula.yaml'}\n",
+    "base_cfg  = str(space_dir / 'configs' / 'base_config.yaml')\n",
+    "lang_cfg  = str(space_dir / 'configs' / lang_config_map[TRAIN_LANG])\n",
+    "output_dir = f'/tmp/sahel_adapter_{TRAIN_LANG}'\n",
+    "\n",
+    "# Override output_dir so adapter saves to /tmp on Colab\n",
+    "import yaml\n",
+    "with open(lang_cfg) as f:\n",
+    "    lang_config = yaml.safe_load(f)\n",
+    "lang_config['output_dir'] = output_dir\n",
+    "tmp_lang_cfg = f'/tmp/lora_{TRAIN_LANG}_tmp.yaml'\n",
+    "with open(tmp_lang_cfg, 'w') as f:\n",
+    "    yaml.dump(lang_config, f)\n",
+    "\n",
+    "trainer = WhisperLoRATrainer(\n",
+    "    base_config_path=base_cfg,\n",
+    "    language_config_path=tmp_lang_cfg,\n",
+    ")\n",
+    "trainer.setup()\n",
+    "\n",
+    "if available:\n",
+    "    print(f'Merging {len(available)} corrections (×3) with Waxal baseline (cap=500)...')\n",
+    "    trainer.merge_extra_data(available, repeat=3, waxal_cap=500)\n",
+    "else:\n",
+    "    print('No corrections available — training on Waxal only.')\n",
+    "\n",
+    "trainer.train()\n",
+    "print(f'✅ Training complete — adapter at {output_dir}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-push",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 7 — Push adapter to HF Model repo\n",
+    "from huggingface_hub import HfApi\n",
+    "api = HfApi(token=HF_TOKEN)\n",
+    "\n",
+    "path_in_repo = 'adapters/bambara' if TRAIN_LANG == 'bam' else 'adapters/fula'\n",
+    "n_corrections = len(available)\n",
+    "\n",
+    "api.upload_folder(\n",
+    "    folder_path=output_dir,\n",
+    "    repo_id=ADAPTER_REPO_ID,\n",
+    "    repo_type='model',\n",
+    "    path_in_repo=path_in_repo,\n",
+    "    commit_message=(\n",
+    "        f'Fine-tune {TRAIN_LANG}: Waxal baseline + {n_corrections} farmer corrections'\n",
+    "    ),\n",
+    ")\n",
+    "print(f'✅ Pushed to {ADAPTER_REPO_ID}/{path_in_repo}')\n",
+    "print('\\nNext: Space → Tab 3 → Reload Adapters from Hub')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-sanity",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 8 — Sanity check: compare WER before vs after adapter\n",
+    "import random, torch, librosa, jiwer\n",
+    "from transformers import WhisperForConditionalGeneration, WhisperProcessor\n",
+    "from peft import PeftModel\n",
+    "\n",
+    "if not available:\n",
+    "    print('No test samples — skipping sanity check.')\n",
+    "else:\n",
+    "    test_rec = random.choice(available)\n",
+    "    print(f'Audio    : {Path(test_rec[\"audio_file\"]).name}')\n",
+    "    print(f'Expected : {test_rec[\"corrected_text\"]}')\n",
+    "    print(f'Pre-train: {test_rec[\"whisper_output\"]}')\n",
+    "\n",
+    "    # Load base + adapter\n",
+    "    processor = WhisperProcessor.from_pretrained(WHISPER_MODEL_ID, token=HF_TOKEN)\n",
+    "    base = WhisperForConditionalGeneration.from_pretrained(\n",
+    "        WHISPER_MODEL_ID, torch_dtype=torch.float16, token=HF_TOKEN\n",
+    "    ).to('cuda')\n",
+    "    model = PeftModel.from_pretrained(base, output_dir).eval()\n",
+    "\n",
+    "    audio_np, _ = librosa.load(test_rec['audio_file'], sr=16000, mono=True)\n",
+    "    feats = processor.feature_extractor(\n",
+    "        audio_np, sampling_rate=16000, return_tensors='pt'\n",
+    "    ).input_features.half().to('cuda')\n",
+    "\n",
+    "    with torch.no_grad():\n",
+    "        ids = model.generate(feats, max_new_tokens=256)\n",
+    "    result = processor.batch_decode(ids, skip_special_tokens=True)[0].strip()\n",
+    "    print(f'Post-train: {result}')\n",
+    "\n",
+    "    ref = test_rec['corrected_text']\n",
+    "    wer_before = jiwer.wer(ref, test_rec['whisper_output']) if test_rec.get('whisper_output') else 1.0\n",
+    "    wer_after  = jiwer.wer(ref, result)\n",
+    "    print(f'\\nWER before: {wer_before:.1%}  →  WER after: {wer_after:.1%}')\n",
+    "    if wer_after < wer_before:\n",
+    "        print('✅ Adapter improved transcription quality!')\n",
+    "    else:\n",
+    "        print('ℹ️  No improvement on this single sample — collect more corrections and retrain.')"
+   ]
+  }
+ ]
+}

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,50 @@

+# -----------------------------------------------------------------------------
+# Sahel-Agri Voice AI — Python Dependencies
+# HuggingFace Spaces (ZeroGPU) deployment — CUDA pre-installed, no +cu128 suffix
+#
+# Local CPU test:
+#   pip install -r requirements.txt
+# -----------------------------------------------------------------------------
+# PyTorch (CPU build — works on HF Spaces cpu-basic and locally)
+torch==2.11.0
+torchaudio==2.11.0
+# HuggingFace core
+transformers==5.5.0
+datasets==4.8.4
+accelerate==1.13.0
+evaluate==0.4.2
+huggingface-hub==1.9.0
+# PEFT (LoRA adapters)
+peft==0.18.1
+# Audio processing
+librosa==0.10.2
+soundfile==0.12.1
+audiomentations==0.43.1
+# Quantization (CPU: installs fine; 4-bit/8-bit requires GPU at runtime)
+bitsandbytes==0.49.2
+# Metrics
+jiwer==3.0.4
+# Config & environment
+pyyaml==6.0.2
+python-dotenv==1.1.0
+# Gradio (must match sdk_version in README.md)
+gradio==4.44.0
+# Pydantic v2
+pydantic==2.11.3
+# Testing
+pytest==8.3.5
+pytest-asyncio==0.26.0
+# Utilities
+numpy==2.2.4
+scipy==1.15.2

scripts/export_onnx.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""
+Phase 4a: Merge LoRA adapters and export language-specific ONNX models.
+Validates that ONNX WER is within 2% of PyTorch baseline.
+Usage:
+    python scripts/export_onnx.py
+"""
+import logging
+import os
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from dotenv import load_dotenv
+load_dotenv()
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s — %(message)s")
+import yaml
+from src.optimization.onnx_exporter import ONNXExporter
+def export_language(language: str, adapter_path: str, config: dict) -> None:
+    from peft import PeftModel
+    from transformers import WhisperForConditionalGeneration, WhisperProcessor
+    hf_token = os.getenv("HF_TOKEN")
+    model_id = config["model"]["id"]
+    print(f"\n[{language.upper()}] Loading base model...")
+    base_model = WhisperForConditionalGeneration.from_pretrained(model_id, token=hf_token)
+    processor = WhisperProcessor.from_pretrained(model_id, token=hf_token)
+    print(f"[{language.upper()}] Loading adapter from {adapter_path}...")
+    peft_model = PeftModel.from_pretrained(base_model, adapter_path, adapter_name=language)
+    output_dir = f"{config['paths']['models']}/onnx/{language}"
+    exporter = ONNXExporter()
+    result_path = exporter.merge_and_export(peft_model, processor, output_dir, language)
+    print(f"[{language.upper()}] ONNX exported to: {result_path}")
+def main() -> None:
+    with open("configs/base_config.yaml") as f:
+        config = yaml.safe_load(f)
+    print("=" * 60)
+    print("Sahel-Agri Voice AI — ONNX Export")
+    print("=" * 60)
+    bambara_path = os.getenv("BAMBARA_ADAPTER_PATH", "./adapters/bambara")
+    fula_path = os.getenv("FULA_ADAPTER_PATH", "./adapters/fula")
+    for language, adapter_path in [("bambara", bambara_path), ("fula", fula_path)]:
+        if Path(adapter_path).exists():
+            export_language(language, adapter_path, config)
+        else:
+            print(f"\nSkipping {language}: adapter not found at {adapter_path}")
+    print("\nExport complete.")
+if __name__ == "__main__":
+    main()

scripts/run_data_pipeline.py ADDED Viewed

	@@ -0,0 +1,76 @@

+"""
+Phase 2: Download google/waxal, apply augmentation, print statistics.
+Streams examples and caches to data_cache/ as Arrow files.
+Usage:
+    python scripts/run_data_pipeline.py --subset bam --max-examples 100
+"""
+import argparse
+import sys
+import time
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import os
+from dotenv import load_dotenv
+load_dotenv()
+def main(subset: str, max_examples: int) -> None:
+    import yaml
+    from transformers import WhisperProcessor
+    from src.data.augmentation import FieldNoiseAugmenter
+    from src.data.waxal_loader import WaxalDataLoader
+    with open("configs/base_config.yaml") as f:
+        config = yaml.safe_load(f)
+    hf_token = os.getenv("HF_TOKEN")
+    model_id = config["model"]["id"]
+    print("=" * 60)
+    print(f"Waxal Data Pipeline — subset: {subset}")
+    print("=" * 60)
+    print(f"\n[1/4] Loading WhisperProcessor ({model_id})...")
+    processor = WhisperProcessor.from_pretrained(model_id, token=hf_token)
+    print("[2/4] Initializing augmenter...")
+    augmenter = FieldNoiseAugmenter(config["paths"]["noise_samples"], config)
+    print(f"      Augmenter ready: {augmenter.is_ready()}")
+    print(f"[3/4] Streaming google/waxal subset={subset}...")
+    loader = WaxalDataLoader(subset, config, hf_token=hf_token)
+    t0 = time.time()
+    count = 0
+    total_duration = 0.0
+    for example in loader.iter_processed(processor, split="train", augmenter=augmenter):
+        count += 1
+        # input_features shape: (80, 3000) = 30 seconds at most
+        # Estimate actual audio duration from non-padding frames
+        total_duration += 30.0  # max chunk
+        if count >= max_examples:
+            break
+    elapsed = time.time() - t0
+    print(f"\n[4/4] Results:")
+    print(f"      Examples processed: {count}")
+    print(f"      Approx total audio: {total_duration / 3600:.2f} hours")
+    print(f"      Processing time:    {elapsed:.1f}s")
+    print(f"      Throughput:         {count / elapsed:.1f} examples/sec")
+    print(f"\nData pipeline PASSED.")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--subset", default="bam", choices=["bam", "ful"])
+    parser.add_argument("--max-examples", type=int, default=50)
+    args = parser.parse_args()
+    main(args.subset, args.max_examples)

scripts/run_server.py ADDED Viewed

	@@ -0,0 +1,42 @@

+"""
+Phase 4b: Start the FastAPI inference server.
+Usage:
+    python scripts/run_server.py
+    python scripts/run_server.py --host 0.0.0.0 --port 8000
+"""
+import argparse
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from dotenv import load_dotenv
+load_dotenv()
+import uvicorn
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Start Sahel-Agri Voice AI server")
+    parser.add_argument("--host", default="0.0.0.0")
+    parser.add_argument("--port", type=int, default=8000)
+    parser.add_argument("--reload", action="store_true", help="Enable hot-reload (dev only)")
+    args = parser.parse_args()
+    print(f"Starting server on http://{args.host}:{args.port}")
+    print("Endpoints:")
+    print(f"  GET  http://localhost:{args.port}/api/v1/health")
+    print(f"  POST http://localhost:{args.port}/api/v1/transcribe")
+    print(f"  POST http://localhost:{args.port}/api/v1/query")
+    print(f"  GET  http://localhost:{args.port}/docs   (Swagger UI)")
+    print()
+    uvicorn.run(
+        "src.api.app:app",
+        host=args.host,
+        port=args.port,
+        workers=1,        # Single worker: GPU model shared in memory
+        reload=args.reload,
+        log_level="info",
+    )

scripts/train_bambara.py ADDED Viewed

	@@ -0,0 +1,28 @@

+"""
+Phase 3a: Fine-tune LoRA adapter for Bambara (bam).
+Usage:
+    python scripts/train_bambara.py
+"""
+import logging
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from dotenv import load_dotenv
+load_dotenv()
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s — %(message)s")
+from src.training.trainer import WhisperLoRATrainer
+if __name__ == "__main__":
+    trainer = WhisperLoRATrainer(
+        base_config_path="configs/base_config.yaml",
+        language_config_path="configs/lora_bambara.yaml",
+    )
+    trainer.setup()
+    trainer.train()
+    print("\nBambara training complete. Adapter saved to adapters/bambara/")

scripts/train_fula.py ADDED Viewed

	@@ -0,0 +1,29 @@

+"""
+Phase 3b: Fine-tune LoRA adapter for Fula (ful).
+Trains on the same frozen backbone as Bambara — base model weights are NOT modified.
+Usage:
+    python scripts/train_fula.py
+"""
+import logging
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from dotenv import load_dotenv
+load_dotenv()
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s — %(message)s")
+from src.training.trainer import WhisperLoRATrainer
+if __name__ == "__main__":
+    trainer = WhisperLoRATrainer(
+        base_config_path="configs/base_config.yaml",
+        language_config_path="configs/lora_fula.yaml",
+    )
+    trainer.setup()
+    trainer.train()
+    print("\nFula training complete. Adapter saved to adapters/fula/")

scripts/verify_baseline.py ADDED Viewed

	@@ -0,0 +1,78 @@

+"""
+Phase 1 smoke test: load Whisper, run inference on a sample audio clip.
+Prints model info, inference time, GPU memory usage, and sample transcript.
+Usage:
+    python scripts/verify_baseline.py
+"""
+import sys
+import time
+from pathlib import Path
+# Allow imports from project root
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import numpy as np
+import torch
+def main() -> None:
+    from src.engine.whisper_base import WhisperBackbone
+    print("=" * 60)
+    print("Sahel-Agri Voice AI — Baseline Verification")
+    print("=" * 60)
+    # 1. Check environment
+    print(f"\nPython:   {sys.version.split()[0]}")
+    print(f"PyTorch:  {torch.__version__}")
+    print(f"CUDA available: {torch.cuda.is_available()}")
+    if torch.cuda.is_available():
+        print(f"GPU:      {torch.cuda.get_device_name(0)}")
+        print(f"VRAM:     {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
+    # 2. Load model
+    print("\n[1/3] Loading backbone model...")
+    t0 = time.time()
+    backbone = WhisperBackbone("configs/base_config.yaml")
+    backbone.load(device="cuda")
+    load_time = time.time() - t0
+    print(f"      Loaded in {load_time:.1f}s")
+    if torch.cuda.is_available():
+        used = torch.cuda.memory_allocated() / 1e9
+        reserved = torch.cuda.memory_reserved() / 1e9
+        print(f"      GPU memory: {used:.2f} GB allocated / {reserved:.2f} GB reserved")
+    # 3. Generate synthetic test audio (1 second of silence with slight noise)
+    print("\n[2/3] Generating test audio (1s white noise)...")
+    sample_rate = 16000
+    duration = 1.0
+    audio = np.random.randn(int(sample_rate * duration)).astype(np.float32) * 0.01
+    # 4. Run inference
+    print("[3/3] Running inference...")
+    processor = backbone.processor
+    model = backbone.model
+    inputs = processor(audio, sampling_rate=sample_rate, return_tensors="pt")
+    input_features = inputs.input_features.to(backbone.device)
+    if backbone.device == "cuda":
+        input_features = input_features.half()
+    t0 = time.time()
+    with torch.no_grad():
+        predicted_ids = model.generate(input_features, max_new_tokens=50)
+    infer_time = time.time() - t0
+    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+    print(f"\n{'=' * 60}")
+    print(f"Transcript:      '{transcription}' (noise input — blank expected)")
+    print(f"Inference time:  {infer_time * 1000:.0f} ms")
+    print(f"\nBaseline verification PASSED.")
+    print(f"{'=' * 60}")
+if __name__ == "__main__":
+    main()

src/__init__.py ADDED Viewed

File without changes

src/api/__init__.py ADDED Viewed

File without changes

src/api/app.py ADDED Viewed

	@@ -0,0 +1,98 @@

+"""
+FastAPI application factory.
+Uses lifespan context manager to load the Whisper model at startup
+and register language adapters — keeping a single backbone in GPU memory.
+"""
+from __future__ import annotations
+import logging
+import os
+from contextlib import asynccontextmanager
+import yaml
+from fastapi import FastAPI
+from src.api.middleware import register_middleware
+from src.api.routes import health, iot, transcribe
+from src.engine.adapter_manager import AdapterManager
+from src.engine.transcriber import Transcriber
+from src.engine.whisper_base import WhisperBackbone
+from src.iot.sensor_bridge import SensorBridge
+logger = logging.getLogger(__name__)
+logging.basicConfig(
+    level=os.getenv("LOG_LEVEL", "INFO"),
+    format="%(asctime)s %(levelname)s %(name)s — %(message)s",
+)
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Load model at startup, free GPU memory at shutdown."""
+    with open("configs/base_config.yaml") as f:
+        config = yaml.safe_load(f)
+    hf_token = os.getenv("HF_TOKEN")
+    device = os.getenv("DEVICE", "cuda")
+    bambara_path = os.getenv("BAMBARA_ADAPTER_PATH", "./adapters/bambara")
+    fula_path = os.getenv("FULA_ADAPTER_PATH", "./adapters/fula")
+    sensor_api_url = os.getenv("SENSOR_API_URL") or None
+    # 1. Load backbone
+    logger.info("Loading Whisper backbone...")
+    backbone = WhisperBackbone("configs/base_config.yaml")
+    backbone.load(device=device, hf_token=hf_token)
+    # 2. Register adapters (they are loaded on first use via activate())
+    adapter_manager = AdapterManager(backbone.model, config)
+    adapter_manager.register("bam", bambara_path)
+    adapter_manager.register("ful", fula_path)
+    # 3. Pre-load the default adapter to warm up VRAM
+    try:
+        adapter_manager.load_adapter("bam")
+        logger.info("Default adapter 'bam' pre-loaded.")
+    except Exception as e:
+        logger.warning("Could not pre-load 'bam' adapter: %s", e)
+    # 4. Create transcriber and sensor bridge
+    transcriber = Transcriber(backbone, adapter_manager)
+    sensor_bridge = SensorBridge(sensor_api_url=sensor_api_url)
+    # 5. Attach to app.state for dependency injection
+    app.state.backbone = backbone
+    app.state.adapter_manager = adapter_manager
+    app.state.transcriber = transcriber
+    app.state.sensor_bridge = sensor_bridge
+    logger.info("Sahel-Agri Voice AI server ready.")
+    yield
+    # Shutdown
+    logger.info("Shutting down — freeing GPU memory...")
+    backbone.free()
+def create_app() -> FastAPI:
+    app = FastAPI(
+        title="Sahel-Agri Voice AI",
+        description=(
+            "Modular STT engine for Bambara and Fula — serving Mali and Guinea farmers "
+            "via voice-first agricultural intelligence."
+        ),
+        version="0.1.0",
+        lifespan=lifespan,
+    )
+    register_middleware(app)
+    # Register routes
+    app.include_router(health.router, prefix="/api/v1", tags=["health"])
+    app.include_router(transcribe.router, prefix="/api/v1", tags=["transcribe"])
+    app.include_router(iot.router, prefix="/api/v1", tags=["iot"])
+    return app
+app = create_app()

src/api/dependencies.py ADDED Viewed

	@@ -0,0 +1,20 @@

+"""FastAPI dependency injection: retrieves shared model objects from app.state."""
+from __future__ import annotations
+from fastapi import Request
+from src.engine.adapter_manager import AdapterManager
+from src.engine.transcriber import Transcriber
+from src.iot.sensor_bridge import SensorBridge
+def get_transcriber(request: Request) -> Transcriber:
+    return request.app.state.transcriber
+def get_adapter_manager(request: Request) -> AdapterManager:
+    return request.app.state.adapter_manager
+def get_sensor_bridge(request: Request) -> SensorBridge:
+    return request.app.state.sensor_bridge

src/api/middleware.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""CORS, structured request logging, and rate-limit middleware."""
+from __future__ import annotations
+import logging
+import time
+import uuid
+from fastapi import FastAPI, Request, Response
+from fastapi.middleware.cors import CORSMiddleware
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.errors import RateLimitExceeded
+from slowapi.util import get_remote_address
+logger = logging.getLogger(__name__)
+limiter = Limiter(key_func=get_remote_address, default_limits=["60/minute"])
+def register_middleware(app: FastAPI) -> None:
+    """Attach all middleware to the FastAPI app."""
+    # CORS — allow WhatsApp webhook domain and local development
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],  # Tighten in production with specific domains
+        allow_credentials=True,
+        allow_methods=["GET", "POST"],
+        allow_headers=["*"],
+    )
+    # Rate limiting
+    app.state.limiter = limiter
+    app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+    @app.middleware("http")
+    async def logging_middleware(request: Request, call_next) -> Response:
+        request_id = str(uuid.uuid4())[:8]
+        t0 = time.perf_counter()
+        response = await call_next(request)
+        elapsed_ms = int((time.perf_counter() - t0) * 1000)
+        logger.info(
+            "req_id=%s method=%s path=%s status=%d latency_ms=%d",
+            request_id, request.method, request.url.path,
+            response.status_code, elapsed_ms,
+        )
+        response.headers["X-Request-ID"] = request_id
+        return response

src/api/routes/__init__.py ADDED Viewed

File without changes

src/api/routes/health.py ADDED Viewed

	@@ -0,0 +1,25 @@

+"""GET /api/v1/health — model status and adapter availability."""
+from __future__ import annotations
+from fastapi import APIRouter, Depends, Request
+from src.api.dependencies import get_adapter_manager
+from src.api.schemas import HealthResponse
+from src.engine.adapter_manager import AdapterManager
+router = APIRouter()
+@router.get("/health", response_model=HealthResponse)
+async def health_check(
+    request: Request,
+    adapter_manager: AdapterManager = Depends(get_adapter_manager),
+) -> HealthResponse:
+    model_loaded = hasattr(request.app.state, "transcriber")
+    return HealthResponse(
+        status="ok" if model_loaded else "loading",
+        model_loaded=model_loaded,
+        active_adapter=adapter_manager.get_active(),
+        adapters_available=adapter_manager.list_available(),
+        adapters_loaded=adapter_manager.list_loaded(),
+    )

src/api/routes/iot.py ADDED Viewed

	@@ -0,0 +1,90 @@

+"""POST /api/v1/query — full pipeline: audio → transcription → intent → sensor → voice response."""
+from __future__ import annotations
+import logging
+import os
+import tempfile
+import time
+from typing import Annotated, Optional
+from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
+from src.api.dependencies import get_sensor_bridge, get_transcriber
+from src.api.schemas import IoTQueryResponse
+from src.engine.transcriber import Transcriber
+from src.iot.intent_parser import IntentParser
+from src.iot.sensor_bridge import SensorBridge
+from src.iot.voice_responder import VoiceResponder
+logger = logging.getLogger(__name__)
+router = APIRouter()
+_intent_parser = IntentParser()
+_voice_responder = VoiceResponder(language="fr")
+SUPPORTED_LANGUAGES = {"bam", "ful"}
+MAX_AUDIO_BYTES = 10 * 1024 * 1024
+@router.post("/query", response_model=IoTQueryResponse)
+async def agricultural_query(
+    audio_file: Annotated[UploadFile, File(description="Audio file with farmer's voice query")],
+    language: Annotated[str, Form(description="Language code: 'bam' or 'ful'")] = "bam",
+    field_id: Annotated[Optional[str], Form(description="Field/location ID for sensor lookup")] = None,
+    transcriber: Transcriber = Depends(get_transcriber),
+    sensor_bridge: SensorBridge = Depends(get_sensor_bridge),
+) -> IoTQueryResponse:
+    t0 = time.perf_counter()
+    if language not in SUPPORTED_LANGUAGES:
+        raise HTTPException(
+            status_code=422,
+            detail=f"Unsupported language '{language}'. Supported: {sorted(SUPPORTED_LANGUAGES)}",
+        )
+    audio_bytes = await audio_file.read()
+    if len(audio_bytes) > MAX_AUDIO_BYTES:
+        raise HTTPException(status_code=413, detail="Audio file too large. Max 10 MB.")
+    ext = os.path.splitext(audio_file.filename or "audio.wav")[1].lower() or ".wav"
+    tmp_path = None
+    try:
+        # Step 1: Transcribe
+        with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
+            tmp.write(audio_bytes)
+            tmp_path = tmp.name
+        transcription_result = transcriber.transcribe_file(tmp_path, language)
+        # Step 2: Parse intent
+        intent = _intent_parser.parse(transcription_result.text, language)
+        # Step 3: Fetch sensor data
+        sensor_data = await sensor_bridge.fetch(intent, field_id=field_id)
+        # Step 4: Generate voice response
+        voice_response = _voice_responder.generate_response(intent, sensor_data)
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error("IoT query failed: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        if tmp_path and os.path.exists(tmp_path):
+            os.unlink(tmp_path)
+    elapsed_ms = int((time.perf_counter() - t0) * 1000)
+    return IoTQueryResponse(
+        transcription=transcription_result.text,
+        language=language,
+        intent={
+            "action": intent.action,
+            "entity": intent.entity,
+            "confidence": intent.confidence,
+        },
+        sensor_data=sensor_data.values,
+        voice_response=voice_response,
+        processing_time_ms=elapsed_ms,
+    )

src/api/routes/transcribe.py ADDED Viewed

	@@ -0,0 +1,74 @@

+"""POST /api/v1/transcribe — convert uploaded audio to text."""
+from __future__ import annotations
+import logging
+import os
+import tempfile
+from typing import Annotated
+from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
+from src.api.dependencies import get_transcriber
+from src.api.schemas import TranscribeResponse
+from src.engine.transcriber import Transcriber
+logger = logging.getLogger(__name__)
+router = APIRouter()
+SUPPORTED_LANGUAGES = {"bam", "ful"}
+SUPPORTED_EXTENSIONS = {".wav", ".mp3", ".ogg", ".m4a", ".flac", ".webm"}
+MAX_AUDIO_BYTES = 10 * 1024 * 1024  # 10 MB
+@router.post("/transcribe", response_model=TranscribeResponse)
+async def transcribe_audio(
+    audio_file: Annotated[UploadFile, File(description="Audio file (wav/mp3/ogg/m4a/flac/webm)")],
+    language: Annotated[str, Form(description="Language code: 'bam' (Bambara) or 'ful' (Fula)")] = "bam",
+    transcriber: Transcriber = Depends(get_transcriber),
+) -> TranscribeResponse:
+    # Validate language
+    if language not in SUPPORTED_LANGUAGES:
+        raise HTTPException(
+            status_code=422,
+            detail=f"Unsupported language '{language}'. Supported: {sorted(SUPPORTED_LANGUAGES)}",
+        )
+    # Validate file extension
+    filename = audio_file.filename or "audio.wav"
+    ext = os.path.splitext(filename)[1].lower()
+    if ext not in SUPPORTED_EXTENSIONS:
+        raise HTTPException(
+            status_code=422,
+            detail=f"Unsupported file type '{ext}'. Supported: {sorted(SUPPORTED_EXTENSIONS)}",
+        )
+    # Read and size-check
+    audio_bytes = await audio_file.read()
+    if len(audio_bytes) > MAX_AUDIO_BYTES:
+        raise HTTPException(
+            status_code=413,
+            detail=f"File too large ({len(audio_bytes) / 1e6:.1f} MB). Max 10 MB.",
+        )
+    # Windows-safe temp file: delete=False + manual unlink in finally
+    tmp_path = None
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
+            tmp.write(audio_bytes)
+            tmp_path = tmp.name
+        result = transcriber.transcribe_file(tmp_path, language)
+    except Exception as e:
+        logger.error("Transcription failed: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        if tmp_path and os.path.exists(tmp_path):
+            os.unlink(tmp_path)
+    return TranscribeResponse(
+        text=result.text,
+        language=result.language,
+        duration_s=result.duration_s,
+        processing_time_ms=result.processing_time_ms,
+        confidence=result.confidence,
+    )

src/api/schemas.py ADDED Viewed

	@@ -0,0 +1,36 @@

+"""Pydantic v2 request and response models for all API endpoints."""
+from __future__ import annotations
+from typing import Literal, Optional
+from pydantic import BaseModel, Field
+class TranscribeResponse(BaseModel):
+    text: str
+    language: str
+    duration_s: float
+    processing_time_ms: int
+    confidence: Optional[float] = None
+class IoTQueryResponse(BaseModel):
+    transcription: str
+    language: str
+    intent: dict
+    sensor_data: dict
+    voice_response: str
+    processing_time_ms: int
+class HealthResponse(BaseModel):
+    status: str
+    model_loaded: bool
+    active_adapter: Optional[str]
+    adapters_available: list[str]
+    adapters_loaded: list[str]
+class ErrorResponse(BaseModel):
+    error: str
+    detail: str

src/data/__init__.py ADDED Viewed

File without changes

src/data/agri_dictionary.py ADDED Viewed

	@@ -0,0 +1,92 @@

+"""
+Agricultural vocabulary for Bambara and Fula.
+Used to bias the Whisper decoder toward domain-specific terms via decoder prompt injection.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+import torch
+if TYPE_CHECKING:
+    from transformers import WhisperProcessor
+# Bambara (bam) agricultural vocabulary
+BAMBARA_VOCAB: dict[str, str] = {
+    "sɛnɛ": "farming",
+    "jiriw": "trees",
+    "nɔgɔ": "soil",
+    "sani": "fertilizer",
+    "kogomali": "groundnut",
+    "kaba": "corn/maize",
+    "tiga": "peanut",
+    "ji": "water",
+    "sanji": "rain",
+    "teliman": "weather",
+    "suruku": "pest/predator",
+    "bunding": "soil/earth",
+    "sira": "path/way",
+    "foro": "field",
+    "dugu": "village/land",
+    "dibi": "darkness/shade",
+    "fanga": "strength/fertilizer",
+    "kungoloni": "insects/pests",
+}
+# Fula (ful / Fulfulde) agricultural vocabulary
+FULA_VOCAB: dict[str, str] = {
+    "ngesa": "field",
+    "leydi": "land/soil",
+    "kosam": "milk",
+    "nagge": "cattle",
+    "leeɗe": "crops",
+    "ndiyam": "water",
+    "yeeso": "wind/weather",
+    "laabi": "road/way",
+    "demoore": "farming",
+    "hoore": "head/top",
+    "biñ-biñ": "insects/pests",
+    "fuɗorde": "sunrise/east field",
+    "ngaari": "bull",
+    "mbabba": "donkey",
+    "ladde": "bush/forest",
+    "wutte": "clothing/harvest",
+}
+LANGUAGE_VOCABS: dict[str, dict[str, str]] = {
+    "bam": BAMBARA_VOCAB,
+    "ful": FULA_VOCAB,
+}
+class AgriculturalDictionary:
+    """Converts agricultural vocabulary into decoder prompt token IDs for Whisper."""
+    def get_vocab(self, language: str) -> dict[str, str]:
+        if language not in LANGUAGE_VOCABS:
+            raise ValueError(f"No vocabulary for language '{language}'. Available: {list(LANGUAGE_VOCABS)}")
+        return LANGUAGE_VOCABS[language]
+    def get_prompt_text(self, language: str) -> str:
+        """Return a comma-joined string of all terms, used as decoder text prompt."""
+        vocab = self.get_vocab(language)
+        return ", ".join(vocab.keys())
+    def build_prompt_ids(self, processor: "WhisperProcessor", language: str) -> torch.Tensor:
+        """
+        Tokenize the vocabulary as a decoder prompt.
+        Pass this as `decoder_input_ids` or `prompt_ids` to model.generate()
+        to bias decoding toward known agricultural terms.
+        """
+        prompt_text = self.get_prompt_text(language)
+        token_ids = processor.tokenizer(
+            prompt_text,
+            return_tensors="pt",
+            add_special_tokens=False,
+        ).input_ids
+        return token_ids  # shape: (1, N)
+    def get_token_ids(self, processor: "WhisperProcessor", language: str) -> list[int]:
+        """Return flat list of token IDs for all vocabulary terms."""
+        ids = self.build_prompt_ids(processor, language)
+        return ids[0].tolist()

src/data/augmentation.py ADDED Viewed

	@@ -0,0 +1,84 @@

+"""
+Field noise augmentation for West African farm environments.
+Mixes clean speech with tractor, wind, and livestock audio samples.
+Degrades gracefully to Gaussian noise when no .wav files are present.
+"""
+from __future__ import annotations
+import logging
+from pathlib import Path
+import numpy as np
+logger = logging.getLogger(__name__)
+class FieldNoiseAugmenter:
+    """
+    Applies audiomentations transforms that simulate noisy field conditions.
+    If the noise_dir has no .wav files, falls back to Gaussian noise only.
+    """
+    def __init__(self, noise_dir: str, config: dict) -> None:
+        self.noise_dir = Path(noise_dir)
+        self.config = config
+        self._compose = None
+        self._gaussian_only = False
+        self._build_pipeline()
+    def _build_pipeline(self) -> None:
+        try:
+            from audiomentations import (
+                AddBackgroundNoise,
+                AddGaussianNoise,
+                Compose,
+                RoomSimulator,
+                TimeStretch,
+            )
+        except ImportError:
+            logger.warning("audiomentations not installed — augmentation disabled.")
+            self._compose = None
+            return
+        snr_range = self.config.get("audio", {}).get("noise_snr_db_range", [5, 20])
+        prob = self.config.get("audio", {}).get("augmentation_prob", 0.6)
+        wav_files = list(self.noise_dir.glob("*.wav")) if self.noise_dir.exists() else []
+        transforms = []
+        if wav_files:
+            transforms.append(
+                AddBackgroundNoise(
+                    sounds_path=str(self.noise_dir),
+                    min_snr_db=float(snr_range[0]),
+                    max_snr_db=float(snr_range[1]),
+                    p=prob,
+                )
+            )
+            logger.info("FieldNoiseAugmenter: loaded %d noise files from %s", len(wav_files), self.noise_dir)
+        else:
+            logger.warning(
+                "FieldNoiseAugmenter: no .wav files found in %s — using Gaussian noise only. "
+                "Populate noise_samples/ for realistic field augmentation.",
+                self.noise_dir,
+            )
+            self._gaussian_only = True
+        transforms += [
+            AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.3),
+            TimeStretch(min_rate=0.9, max_rate=1.1, leave_length_unchanged=True, p=0.2),
+            RoomSimulator(p=0.3),
+        ]
+        self._compose = Compose(transforms)
+    def augment(self, audio: np.ndarray, sr: int) -> np.ndarray:
+        """Apply augmentation pipeline to a float32 audio array."""
+        if self._compose is None:
+            return audio
+        return self._compose(samples=audio, sample_rate=sr)
+    def is_ready(self) -> bool:
+        """Returns True if augmentation is available (even Gaussian-only)."""
+        return self._compose is not None

src/data/feature_extractor.py ADDED Viewed

	@@ -0,0 +1,89 @@

+"""
+Log-mel spectrogram extraction, padding/truncation, and batch collation for Whisper.
+"""
+from __future__ import annotations
+import logging
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any
+import numpy as np
+import torch
+import torchaudio
+if TYPE_CHECKING:
+    from transformers import WhisperProcessor
+logger = logging.getLogger(__name__)
+TARGET_SR = 16_000
+MEL_FRAMES = 3000  # 30 seconds at 100 frames/sec
+N_MELS = 80
+class AudioFeatureExtractor:
+    """Wraps WhisperProcessor to extract and normalize audio features."""
+    def __init__(self, processor: "WhisperProcessor", config: dict) -> None:
+        self.processor = processor
+        self.sample_rate = config.get("audio", {}).get("sample_rate", TARGET_SR)
+    def extract(self, audio: np.ndarray, sr: int) -> torch.Tensor:
+        """
+        Resample audio to 16kHz, extract log-mel features.
+        Returns tensor of shape (80, 3000).
+        """
+        if sr != TARGET_SR:
+            tensor = torch.from_numpy(audio).unsqueeze(0)
+            tensor = torchaudio.functional.resample(tensor, sr, TARGET_SR)
+            audio = tensor.squeeze(0).numpy()
+        inputs = self.processor.feature_extractor(
+            audio,
+            sampling_rate=TARGET_SR,
+            return_tensors="pt",
+        )
+        features = inputs.input_features[0]  # (80, 3000)
+        return features
+    def pad_or_truncate(self, features: torch.Tensor) -> torch.Tensor:
+        """Ensure features are exactly (80, 3000)."""
+        _, t = features.shape
+        if t < MEL_FRAMES:
+            pad = torch.zeros(N_MELS, MEL_FRAMES - t, dtype=features.dtype)
+            features = torch.cat([features, pad], dim=-1)
+        elif t > MEL_FRAMES:
+            features = features[:, :MEL_FRAMES]
+        return features
+@dataclass
+class DataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Pads input_features to uniform length and label sequences with -100
+    (so they are ignored in the cross-entropy loss).
+    Compatible with HuggingFace Seq2SeqTrainer.
+    """
+    processor: Any
+    decoder_start_token_id: int
+    def __call__(self, features: list[dict]) -> dict[str, torch.Tensor]:
+        # Separate input_features and labels
+        input_features = [{"input_features": f["input_features"]} for f in features]
+        label_features = [{"input_ids": f["labels"]} for f in features]
+        # Pad input features (processor handles this)
+        batch = self.processor.feature_extractor.pad(input_features, return_tensors="pt")
+        # Pad labels
+        labels_batch = self.processor.tokenizer.pad(label_features, return_tensors="pt")
+        labels = labels_batch["input_ids"].masked_fill(
+            labels_batch.attention_mask.ne(1), -100
+        )
+        # Remove decoder start token if it was prepended
+        if (labels[:, 0] == self.decoder_start_token_id).all().item():
+            labels = labels[:, 1:]
+        batch["labels"] = labels
+        return batch

src/data/waxal_loader.py ADDED Viewed

	@@ -0,0 +1,119 @@

+"""
+Loads and preprocesses the google/waxal dataset for Bambara (bam) and Fula (ful).
+Uses streaming to avoid downloading the full corpus before training.
+"""
+from __future__ import annotations
+import logging
+from typing import TYPE_CHECKING, Callable, Iterator
+import numpy as np
+import torch
+import torchaudio
+from datasets import load_dataset
+if TYPE_CHECKING:
+    from datasets import Dataset, IterableDataset
+    from transformers import WhisperProcessor
+    from src.data.augmentation import FieldNoiseAugmenter
+logger = logging.getLogger(__name__)
+# google/waxal column names
+AUDIO_COL = "audio"
+TEXT_COL = "transcription"
+TARGET_SR = 16_000
+class WaxalDataLoader:
+    """Streams the google/waxal dataset and prepares examples for Whisper training."""
+    def __init__(
+        self,
+        subset: str,
+        config: dict,
+        hf_token: str | None = None,
+    ) -> None:
+        if subset not in ("bam", "ful"):
+            raise ValueError(f"subset must be 'bam' or 'ful', got '{subset}'")
+        self.subset = subset
+        self.config = config
+        self.hf_token = hf_token
+    def load_split(self, split: str = "train", streaming: bool = True) -> "IterableDataset | Dataset":
+        """Return a single split of google/waxal."""
+        logger.info("Loading google/waxal subset=%s split=%s streaming=%s", self.subset, split, streaming)
+        ds = load_dataset(
+            "google/waxal",
+            self.subset,
+            split=split,
+            token=self.hf_token,
+            streaming=streaming,
+            trust_remote_code=True,
+        )
+        if streaming:
+            ds = ds.shuffle(seed=42, buffer_size=1000)
+        return ds
+    def get_splits(self, streaming: bool = True) -> dict[str, "IterableDataset | Dataset"]:
+        """Return train / validation / test splits."""
+        splits = {}
+        for split in ("train", "validation", "test"):
+            try:
+                splits[split] = self.load_split(split, streaming=streaming)
+            except Exception:
+                logger.warning("Split '%s' not available for subset '%s'", split, self.subset)
+        return splits
+    def make_preprocess_fn(
+        self,
+        processor: "WhisperProcessor",
+        augmenter: "FieldNoiseAugmenter | None" = None,
+    ) -> Callable[[dict], dict]:
+        """Return a function that converts a raw Waxal example into model inputs."""
+        def preprocess(example: dict) -> dict:
+            # Extract and resample audio
+            audio_array = np.array(example[AUDIO_COL]["array"], dtype=np.float32)
+            orig_sr: int = example[AUDIO_COL]["sampling_rate"]
+            if orig_sr != TARGET_SR:
+                tensor = torch.from_numpy(audio_array).unsqueeze(0)
+                tensor = torchaudio.functional.resample(tensor, orig_sr, TARGET_SR)
+                audio_array = tensor.squeeze(0).numpy()
+            # Apply field noise augmentation if provided
+            if augmenter is not None and augmenter.is_ready():
+                audio_array = augmenter.augment(audio_array, TARGET_SR)
+            # Extract log-mel features
+            inputs = processor.feature_extractor(
+                audio_array,
+                sampling_rate=TARGET_SR,
+                return_tensors="np",
+            )
+            input_features = inputs.input_features[0]  # shape (80, 3000)
+            # Tokenize transcript
+            text: str = example[TEXT_COL]
+            labels = processor.tokenizer(text, return_tensors="np").input_ids[0]
+            return {
+                "input_features": input_features,
+                "labels": labels,
+            }
+        return preprocess
+    def iter_processed(
+        self,
+        processor: "WhisperProcessor",
+        split: str = "train",
+        augmenter: "FieldNoiseAugmenter | None" = None,
+    ) -> Iterator[dict]:
+        """Yield preprocessed examples one at a time (streaming)."""
+        ds = self.load_split(split, streaming=True)
+        fn = self.make_preprocess_fn(processor, augmenter)
+        for example in ds:
+            yield fn(example)

src/engine/__init__.py ADDED Viewed

File without changes

src/engine/adapter_manager.py ADDED Viewed

	@@ -0,0 +1,106 @@

+"""
+LoRA adapter hot-swap manager.
+Uses PEFT's multi-adapter API:
+  - model.load_adapter(path, adapter_name=lang)  — first load (~2s per adapter)
+  - model.set_adapter(lang)                       — subsequent swap (~50ms)
+This keeps a single backbone in VRAM and swaps only the ~50MB adapter weights,
+vs reloading the full 1.5GB model per language.
+"""
+from __future__ import annotations
+import logging
+from pathlib import Path
+from typing import TYPE_CHECKING
+from peft import PeftModel
+if TYPE_CHECKING:
+    from transformers import WhisperForConditionalGeneration
+logger = logging.getLogger(__name__)
+class AdapterManager:
+    """Manages registration and hot-swapping of LoRA language adapters."""
+    def __init__(self, base_model: "WhisperForConditionalGeneration", config: dict) -> None:
+        self._base_model = base_model
+        self._config = config
+        self._registry: dict[str, str] = {}  # language_code -> adapter_path
+        self._peft_model: PeftModel | None = None
+        self._active: str | None = None
+    def register(self, language: str, adapter_path: str) -> None:
+        """Register an adapter path. Does not load it yet."""
+        path = Path(adapter_path)
+        if not path.exists():
+            logger.warning(
+                "Adapter path '%s' for language '%s' does not exist. "
+                "Run training first, or check the path.",
+                adapter_path, language,
+            )
+        self._registry[language] = str(path)
+        logger.info("Registered adapter '%s' → %s", language, adapter_path)
+    def load_adapter(self, language: str) -> None:
+        """
+        Load an adapter into the model for the first time.
+        Slow (~2s): reads adapter weights from disk.
+        Subsequent activate() calls reuse the already-loaded weights.
+        """
+        if language not in self._registry:
+            raise KeyError(f"No adapter registered for language '{language}'. "
+                           f"Available: {list(self._registry)}")
+        adapter_path = self._registry[language]
+        if self._peft_model is None:
+            # First adapter: wrap the base model with PeftModel
+            logger.info("Wrapping base model with first adapter '%s'...", language)
+            self._peft_model = PeftModel.from_pretrained(
+                self._base_model,
+                adapter_path,
+                adapter_name=language,
+            )
+        else:
+            # Subsequent adapters: load into the existing PeftModel
+            logger.info("Loading adapter '%s' into existing PeftModel...", language)
+            self._peft_model.load_adapter(adapter_path, adapter_name=language)
+        self._active = language
+        logger.info("Adapter '%s' loaded and active.", language)
+    def activate(self, language: str) -> None:
+        """
+        Hot-swap to a previously loaded adapter (~50ms).
+        Call load_adapter() first if this adapter hasn't been loaded.
+        """
+        if self._peft_model is None:
+            self.load_adapter(language)
+            return
+        loaded = set(self._peft_model.peft_config.keys())
+        if language not in loaded:
+            self.load_adapter(language)
+            return
+        self._peft_model.set_adapter(language)
+        self._active = language
+        logger.debug("Hot-swapped to adapter '%s'.", language)
+    def get_model(self) -> "WhisperForConditionalGeneration | PeftModel":
+        """Return the PeftModel (or base model if no adapter loaded yet)."""
+        return self._peft_model if self._peft_model is not None else self._base_model
+    def get_active(self) -> str | None:
+        return self._active
+    def list_available(self) -> list[str]:
+        return list(self._registry.keys())
+    def list_loaded(self) -> list[str]:
+        if self._peft_model is None:
+            return []
+        return list(self._peft_model.peft_config.keys())

src/engine/transcriber.py ADDED Viewed

	@@ -0,0 +1,132 @@

+"""
+Public inference interface.
+Accepts audio as a file path or numpy array and returns transcribed text.
+Handles chunking for audio longer than 30 seconds.
+"""
+from __future__ import annotations
+import logging
+import os
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import TYPE_CHECKING
+import numpy as np
+import torch
+if TYPE_CHECKING:
+    from src.engine.adapter_manager import AdapterManager
+    from src.engine.whisper_base import WhisperBackbone
+logger = logging.getLogger(__name__)
+TARGET_SR = 16_000
+@dataclass
+class TranscriptionResult:
+    text: str
+    language: str
+    duration_s: float
+    processing_time_ms: int
+    confidence: float | None = None
+class Transcriber:
+    """
+    Composes WhisperBackbone + AdapterManager to provide a simple transcription API.
+    Thread-safety: Not thread-safe by design — use one worker process.
+    """
+    def __init__(self, backbone: "WhisperBackbone", adapter_manager: "AdapterManager") -> None:
+        self._backbone = backbone
+        self._adapter_manager = adapter_manager
+    def transcribe(
+        self,
+        audio: np.ndarray,
+        sample_rate: int,
+        language: str,
+        use_agri_prompt: bool = True,
+    ) -> TranscriptionResult:
+        """
+        Transcribe a float32 audio array.
+        For audio > 30s, uses transformers pipeline with chunking.
+        """
+        t0 = time.time()
+        # Activate the correct language adapter
+        self._adapter_manager.activate(language)
+        processor = self._backbone.processor
+        model = self._adapter_manager.get_model()
+        device = self._backbone.device
+        duration_s = len(audio) / sample_rate
+        if duration_s <= 30.0:
+            text = self._transcribe_chunk(audio, sample_rate, language, processor, model, device)
+        else:
+            text = self._transcribe_long(audio, sample_rate, language, processor, model, device)
+        elapsed_ms = int((time.time() - t0) * 1000)
+        return TranscriptionResult(
+            text=text.strip(),
+            language=language,
+            duration_s=duration_s,
+            processing_time_ms=elapsed_ms,
+        )
+    def transcribe_file(self, audio_path: str, language: str) -> TranscriptionResult:
+        """Load audio from disk and transcribe."""
+        import librosa
+        audio, sr = librosa.load(audio_path, sr=TARGET_SR, mono=True)
+        return self.transcribe(audio, sr, language)
+    def _transcribe_chunk(
+        self,
+        audio: np.ndarray,
+        sr: int,
+        language: str,
+        processor,
+        model,
+        device: str,
+    ) -> str:
+        """Transcribe a single ≤30s chunk."""
+        inputs = processor.feature_extractor(
+            audio, sampling_rate=sr, return_tensors="pt"
+        )
+        input_features = inputs.input_features.to(device)
+        if device == "cuda":
+            input_features = input_features.half()
+        forced_decoder_ids = processor.get_decoder_prompt_ids(
+            language=language, task="transcribe"
+        )
+        with torch.no_grad():
+            predicted_ids = model.generate(
+                input_features,
+                forced_decoder_ids=forced_decoder_ids,
+                max_new_tokens=128,
+            )
+        return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+    def _transcribe_long(
+        self,
+        audio: np.ndarray,
+        sr: int,
+        language: str,
+        processor,
+        model,
+        device: str,
+    ) -> str:
+        """Chunk audio into 30s segments and concatenate transcriptions."""
+        chunk_size = TARGET_SR * 30
+        chunks = [audio[i : i + chunk_size] for i in range(0, len(audio), chunk_size)]
+        parts = []
+        for chunk in chunks:
+            text = self._transcribe_chunk(chunk, sr, language, processor, model, device)
+            parts.append(text)
+        return " ".join(parts)

src/engine/whisper_base.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""
+Loads the Whisper backbone model and processor once.
+All other modules receive references to this shared instance.
+"""
+from __future__ import annotations
+import logging
+from pathlib import Path
+import torch
+import yaml
+from transformers import WhisperForConditionalGeneration, WhisperProcessor
+logger = logging.getLogger(__name__)
+class WhisperBackbone:
+    """Singleton-style loader for the Whisper base model and processor."""
+    def __init__(self, config_path: str = "configs/base_config.yaml") -> None:
+        config_path = Path(config_path)
+        with open(config_path) as f:
+            cfg = yaml.safe_load(f)
+        self._model_id: str = cfg["model"]["id"]
+        self._model: WhisperForConditionalGeneration | None = None
+        self._processor: WhisperProcessor | None = None
+        self._device: str = "cpu"
+    def load(self, device: str = "cuda", hf_token: str | None = None) -> None:
+        """Load model and processor into memory. Call once at startup."""
+        self._device = device if torch.cuda.is_available() and device == "cuda" else "cpu"
+        logger.info("Loading %s on %s", self._model_id, self._device)
+        self._processor = WhisperProcessor.from_pretrained(
+            self._model_id,
+            token=hf_token,
+        )
+        dtype = torch.float16 if self._device == "cuda" else torch.float32
+        self._model = WhisperForConditionalGeneration.from_pretrained(
+            self._model_id,
+            torch_dtype=dtype,
+            token=hf_token,
+        ).to(self._device)
+        self._model.eval()
+        logger.info("Model loaded successfully (dtype=%s, device=%s)", dtype, self._device)
+    @property
+    def model(self) -> WhisperForConditionalGeneration:
+        if self._model is None:
+            raise RuntimeError("Call WhisperBackbone.load() before accessing the model.")
+        return self._model
+    @property
+    def processor(self) -> WhisperProcessor:
+        if self._processor is None:
+            raise RuntimeError("Call WhisperBackbone.load() before accessing the processor.")
+        return self._processor
+    @property
+    def device(self) -> str:
+        return self._device
+    @property
+    def model_id(self) -> str:
+        return self._model_id
+    def free(self) -> None:
+        """Release GPU memory."""
+        del self._model
+        del self._processor
+        self._model = None
+        self._processor = None
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        logger.info("Backbone freed from memory.")

src/iot/__init__.py ADDED Viewed

File without changes

src/iot/intent_parser.py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""
+Maps transcribed Bambara/Fula text to structured intents for IoT sensor queries.
+Uses keyword matching (no ML required for v1).
+Confidence = fraction of intent keywords present in the transcription.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+@dataclass
+class Intent:
+    action: str          # e.g., "check_soil", "check_weather"
+    entity: str          # e.g., "soil", "weather"
+    parameters: dict = field(default_factory=dict)
+    confidence: float = 0.0
+# Intent keyword taxonomy for Bambara (bam) and Fula (ful)
+INTENT_KEYWORDS: dict[str, dict[str, list[str]]] = {
+    "check_soil": {
+        "bam": ["bunding", "nɔgɔ", "dugu", "foro", "sani"],
+        "ful": ["leydi", "ngesa", "ladde"],
+    },
+    "check_weather": {
+        "bam": ["teliman", "sanji", "dibi", "sira"],
+        "ful": ["yeeso", "fuɗorde"],
+    },
+    "irrigation_status": {
+        "bam": ["ji", "sanji", "foro"],
+        "ful": ["ndiyam", "ngesa"],
+    },
+    "pest_alert": {
+        "bam": ["kungoloni", "suruku"],
+        "ful": ["biñ-biñ"],
+    },
+}
+INTENT_ENTITIES = {
+    "check_soil": "soil",
+    "check_weather": "weather",
+    "irrigation_status": "irrigation",
+    "pest_alert": "pest",
+}
+class IntentParser:
+    """Parses a transcription string into a structured Intent."""
+    def parse(self, text: str, language: str) -> Intent:
+        """
+        Find the best matching intent by counting keyword overlaps.
+        Returns the highest-confidence intent.
+        """
+        text_lower = text.lower()
+        best_action = "unknown"
+        best_confidence = 0.0
+        for action, lang_keywords in INTENT_KEYWORDS.items():
+            keywords = lang_keywords.get(language, [])
+            if not keywords:
+                continue
+            matches = sum(1 for kw in keywords if kw in text_lower)
+            confidence = matches / len(keywords)
+            if confidence > best_confidence:
+                best_confidence = confidence
+                best_action = action
+        return Intent(
+            action=best_action,
+            entity=INTENT_ENTITIES.get(best_action, "unknown"),
+            confidence=round(best_confidence, 3),
+        )

src/iot/sensor_bridge.py ADDED Viewed

	@@ -0,0 +1,121 @@

+"""
+Fetches sensor data (soil moisture, weather, irrigation) from the IoT backend API.
+Falls back to synthetic mock data when SENSOR_API_URL is not configured.
+"""
+from __future__ import annotations
+import logging
+import random
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from src.iot.intent_parser import Intent
+logger = logging.getLogger(__name__)
+@dataclass
+class SensorData:
+    sensor_type: str
+    values: dict[str, float]
+    timestamp: str
+    unit: str = ""
+class SensorBridge:
+    """Async bridge to IoT sensor API. Uses mock data when no API URL is configured."""
+    def __init__(self, sensor_api_url: str | None = None, timeout_s: float = 5.0) -> None:
+        self.sensor_api_url = sensor_api_url
+        self.timeout_s = timeout_s
+        self._mock_mode = not sensor_api_url
+        if self._mock_mode:
+            logger.info("SensorBridge: running in MOCK mode (set SENSOR_API_URL to use real sensors).")
+    async def fetch(self, intent: "Intent", field_id: str | None = None) -> SensorData:
+        """Dispatch to the correct sensor fetch method based on intent entity."""
+        action = intent.action
+        if action == "check_soil":
+            return await self.get_soil_data(field_id or "default")
+        elif action == "check_weather":
+            return await self.get_weather(field_id or "default")
+        elif action == "irrigation_status":
+            return await self.get_irrigation(field_id or "default")
+        elif action == "pest_alert":
+            return await self.get_pest_status(field_id or "default")
+        else:
+            return SensorData(
+                sensor_type="unknown",
+                values={},
+                timestamp=datetime.utcnow().isoformat(),
+            )
+    async def get_soil_data(self, location_id: str) -> SensorData:
+        if self._mock_mode:
+            return SensorData(
+                sensor_type="soil",
+                values={
+                    "moisture_pct": round(random.uniform(25, 65), 1),
+                    "ph": round(random.uniform(5.5, 7.5), 1),
+                    "nitrogen_ppm": round(random.uniform(10, 40), 1),
+                    "temperature_c": round(random.uniform(24, 35), 1),
+                },
+                timestamp=datetime.utcnow().isoformat(),
+            )
+        return await self._get(f"/sensors/soil/{location_id}", "soil")
+    async def get_weather(self, location_id: str) -> SensorData:
+        if self._mock_mode:
+            return SensorData(
+                sensor_type="weather",
+                values={
+                    "temperature_c": round(random.uniform(28, 42), 1),
+                    "humidity_pct": round(random.uniform(20, 80), 1),
+                    "wind_speed_kmh": round(random.uniform(0, 25), 1),
+                    "rain_probability_pct": round(random.uniform(0, 100), 1),
+                },
+                timestamp=datetime.utcnow().isoformat(),
+            )
+        return await self._get(f"/sensors/weather/{location_id}", "weather")
+    async def get_irrigation(self, field_id: str) -> SensorData:
+        if self._mock_mode:
+            return SensorData(
+                sensor_type="irrigation",
+                values={
+                    "flow_rate_lph": round(random.uniform(0, 500), 1),
+                    "pressure_bar": round(random.uniform(1.0, 4.0), 2),
+                    "active": float(random.choice([0, 1])),
+                    "last_irrigation_h_ago": round(random.uniform(1, 48), 1),
+                },
+                timestamp=datetime.utcnow().isoformat(),
+            )
+        return await self._get(f"/sensors/irrigation/{field_id}", "irrigation")
+    async def get_pest_status(self, field_id: str) -> SensorData:
+        if self._mock_mode:
+            return SensorData(
+                sensor_type="pest",
+                values={
+                    "trap_count_24h": float(random.randint(0, 50)),
+                    "alert_level": float(random.randint(0, 3)),  # 0=none 1=low 2=medium 3=high
+                },
+                timestamp=datetime.utcnow().isoformat(),
+            )
+        return await self._get(f"/sensors/pest/{field_id}", "pest")
+    async def _get(self, path: str, sensor_type: str) -> SensorData:
+        import httpx
+        url = f"{self.sensor_api_url}{path}"
+        async with httpx.AsyncClient(timeout=self.timeout_s) as client:
+            response = await client.get(url)
+            response.raise_for_status()
+            data = response.json()
+        return SensorData(
+            sensor_type=sensor_type,
+            values=data.get("values", data),
+            timestamp=data.get("timestamp", datetime.utcnow().isoformat()),
+        )

src/iot/voice_responder.py ADDED Viewed

	@@ -0,0 +1,260 @@

+"""
+Generates voice response text from sensor data in the farmer's own language.
+Supports Bambara (bam), Fula (ful), French (fr), and English (en).
+Bambara/Fula templates use short sentences (≤15 words) for best MMS-TTS quality.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from src.iot.intent_parser import Intent
+    from src.iot.sensor_bridge import SensorData
+# Alert thresholds
+SOIL_MOISTURE_LOW = 30.0    # Below this → immediate irrigation recommended
+SOIL_MOISTURE_HIGH = 70.0   # Above this → drainage warning
+SOIL_PH_LOW = 5.5
+SOIL_PH_HIGH = 7.5
+TEMP_HIGH = 38.0
+PEST_ALERT_HIGH = 2         # Alert level ≥ 2 → warning
+# ── Bambara templates (≤6 words per sentence for clear MMS-TTS output) ───────
+BAMBARA_TEMPLATES = {
+    "soil_moisture_low":   "Bunding ji dɔgɔ. I ka foro ji.",
+    "soil_moisture_high":  "Ji ca kojugu. Foro ma fɛ.",
+    "soil_ph_low":         "Bunding kɔnɔ jugu. Kalisi fara a kan.",
+    "soil_ph_high":        "Bunding kɔnɔ tɛmɛ. Soufre fara a kan.",
+    "weather_hot":         "Teliman gbɛlɛ. Tile ma sigi.",
+    "rain_likely":         "Sanji bɛ na. Sɔrɔ jɔ.",
+    "pest_high":           "Dɔgɔw bɛ foro kɔnɔ. Bɔ u.",
+    "irrigation_needed":   "Foro fɛ ji. Ji sira yɔrɔ.",
+    "irrigation_active":   "Ji bɛ taa. A bɛ kɛ cogo di.",
+    "default":             "Kabako jumanw sɔrɔla.",
+}
+# ── Fula templates (≤6 words per sentence for clear MMS-TTS output) ──────────
+FULA_TEMPLATES = {
+    "soil_moisture_low":   "Leydi ndiyam famɗi. Wado ngesa.",
+    "soil_moisture_high":  "Ndiyam heewi. Leydi famɗaali.",
+    "soil_ph_low":         "Leydi suurii. Waɗ kalisi.",
+    "soil_ph_high":        "Leydi alkalii. Waɗ soufre.",
+    "weather_hot":         "Nguleeki heewi. Muusal.",
+    "rain_likely":         "Ndiyam wadata. Loosu ngesa.",
+    "pest_high":           "Biñ-biñ ngesa nder. Fiil ɗen.",
+    "irrigation_needed":   "Ngesa fɛɗɛli ndiyam. Wado.",
+    "irrigation_active":   "Ndiyam wona jooni.",
+    "default":             "Humpito juuti waɗaama.",
+}
+class VoiceResponder:
+    """Converts sensor readings into actionable voice messages in the farmer's language."""
+    def __init__(self, language: str = "fr") -> None:
+        self.language = language
+    def generate_response(self, intent: "Intent", sensor_data: "SensorData") -> str:
+        if self.language == "bam":
+            return self._bambara_response(sensor_data)
+        elif self.language == "ful":
+            return self._fula_response(sensor_data)
+        else:
+            return self._french_response(sensor_data)
+    # ── Bambara ──────────────────────────────────────────────────────────────
+    def _bambara_response(self, sensor_data: "SensorData") -> str:
+        t = sensor_data.sensor_type
+        v = sensor_data.values
+        T = BAMBARA_TEMPLATES
+        if t == "soil":
+            moisture = v.get("moisture_pct")
+            if moisture is not None:
+                if moisture < SOIL_MOISTURE_LOW:
+                    return T["soil_moisture_low"]
+                elif moisture > SOIL_MOISTURE_HIGH:
+                    return T["soil_moisture_high"]
+            ph = v.get("ph")
+            if ph is not None:
+                if ph < SOIL_PH_LOW:
+                    return T["soil_ph_low"]
+                elif ph > SOIL_PH_HIGH:
+                    return T["soil_ph_high"]
+        elif t == "weather":
+            temp = v.get("temperature_c")
+            rain = v.get("rain_probability_pct")
+            if temp is not None and temp > TEMP_HIGH:
+                return T["weather_hot"]
+            if rain is not None and rain > 70:
+                return T["rain_likely"]
+        elif t == "irrigation":
+            last = v.get("last_irrigation_h_ago")
+            active = v.get("active")
+            if active:
+                return T["irrigation_active"]
+            if last is not None and last > 24:
+                return T["irrigation_needed"]
+        elif t == "pest":
+            level = int(v.get("alert_level", 0))
+            if level >= PEST_ALERT_HIGH:
+                return T["pest_high"]
+        return T["default"]
+    # ── Fula ─────────────────────────────────────────────────────────────────
+    def _fula_response(self, sensor_data: "SensorData") -> str:
+        t = sensor_data.sensor_type
+        v = sensor_data.values
+        T = FULA_TEMPLATES
+        if t == "soil":
+            moisture = v.get("moisture_pct")
+            if moisture is not None:
+                if moisture < SOIL_MOISTURE_LOW:
+                    return T["soil_moisture_low"]
+                elif moisture > SOIL_MOISTURE_HIGH:
+                    return T["soil_moisture_high"]
+            ph = v.get("ph")
+            if ph is not None:
+                if ph < SOIL_PH_LOW:
+                    return T["soil_ph_low"]
+                elif ph > SOIL_PH_HIGH:
+                    return T["soil_ph_high"]
+        elif t == "weather":
+            temp = v.get("temperature_c")
+            rain = v.get("rain_probability_pct")
+            if temp is not None and temp > TEMP_HIGH:
+                return T["weather_hot"]
+            if rain is not None and rain > 70:
+                return T["rain_likely"]
+        elif t == "irrigation":
+            active = v.get("active")
+            last = v.get("last_irrigation_h_ago")
+            if active:
+                return T["irrigation_active"]
+            if last is not None and last > 24:
+                return T["irrigation_needed"]
+        elif t == "pest":
+            level = int(v.get("alert_level", 0))
+            if level >= PEST_ALERT_HIGH:
+                return T["pest_high"]
+        return T["default"]
+    # ── French (original) ─────────────────────────────────────────────────────
+    def _french_response(self, sensor_data: "SensorData") -> str:
+        t = sensor_data.sensor_type
+        v = sensor_data.values
+        if t == "soil":
+            return self._soil_response(v)
+        elif t == "weather":
+            return self._weather_response(v)
+        elif t == "irrigation":
+            return self._irrigation_response(v)
+        elif t == "pest":
+            return self._pest_response(v)
+        else:
+            return "Données du capteur non disponibles pour le moment."
+    def _soil_response(self, v: dict) -> str:
+        parts = []
+        moisture = v.get("moisture_pct")
+        ph = v.get("ph")
+        temp = v.get("temperature_c")
+        nitrogen = v.get("nitrogen_ppm")
+        if moisture is not None:
+            parts.append(f"Humidité du sol : {moisture:.0f}%.")
+            if moisture < SOIL_MOISTURE_LOW:
+                parts.append("Irrigation recommandée immédiatement.")
+            elif moisture > SOIL_MOISTURE_HIGH:
+                parts.append("Sol trop humide, risque d'engorgement.")
+        if ph is not None:
+            parts.append(f"pH du sol : {ph:.1f}.")
+            if ph < SOIL_PH_LOW:
+                parts.append("Sol trop acide — envisagez un amendement calcaire.")
+            elif ph > SOIL_PH_HIGH:
+                parts.append("Sol trop alcalin — un apport de soufre peut aider.")
+        if temp is not None:
+            parts.append(f"Température du sol : {temp:.0f}°C.")
+        if nitrogen is not None:
+            parts.append(f"Azote disponible : {nitrogen:.0f} ppm.")
+            if nitrogen < 15:
+                parts.append("Niveau d'azote faible — envisagez un engrais azoté.")
+        return " ".join(parts) if parts else "Données du sol reçues."
+    def _weather_response(self, v: dict) -> str:
+        parts = []
+        temp = v.get("temperature_c")
+        humidity = v.get("humidity_pct")
+        wind = v.get("wind_speed_kmh")
+        rain = v.get("rain_probability_pct")
+        if temp is not None:
+            parts.append(f"Température : {temp:.0f}°C.")
+            if temp > TEMP_HIGH:
+                parts.append("Chaleur excessive — évitez les travaux aux heures les plus chaudes.")
+        if humidity is not None:
+            parts.append(f"Humidité de l'air : {humidity:.0f}%.")
+        if wind is not None:
+            parts.append(f"Vent : {wind:.0f} km/h.")
+        if rain is not None:
+            parts.append(f"Probabilité de pluie : {rain:.0f}%.")
+            if rain > 70:
+                parts.append("Pluie probable — reportez les traitements pesticides.")
+        return " ".join(parts) if parts else "Données météo reçues."
+    def _irrigation_response(self, v: dict) -> str:
+        parts = []
+        active = v.get("active")
+        last = v.get("last_irrigation_h_ago")
+        flow = v.get("flow_rate_lph")
+        if active is not None:
+            state = "en marche" if active else "arrêtée"
+            parts.append(f"Irrigation {state}.")
+        if flow is not None and active:
+            parts.append(f"Débit : {flow:.0f} litres par heure.")
+        if last is not None:
+            parts.append(f"Dernière irrigation il y a {last:.0f} heures.")
+            if last > 24:
+                parts.append("Plus de 24 heures sans irrigation — vérifiez les besoins en eau.")
+        return " ".join(parts) if parts else "Statut d'irrigation reçu."
+    def _pest_response(self, v: dict) -> str:
+        level = int(v.get("alert_level", 0))
+        count = v.get("trap_count_24h")
+        level_labels = {0: "aucune", 1: "faible", 2: "modérée", 3: "élevée"}
+        label = level_labels.get(level, "inconnue")
+        parts = [f"Présence d'insectes nuisibles : niveau {label}."]
+        if count is not None:
+            parts.append(f"{count:.0f} insectes capturés en 24 heures.")
+        if level >= PEST_ALERT_HIGH:
+            parts.append("Traitement recommandé — consultez un agent agricole.")
+        return " ".join(parts)

src/optimization/__init__.py ADDED Viewed

File without changes

src/optimization/onnx_exporter.py ADDED Viewed

	@@ -0,0 +1,106 @@

+"""
+Merges LoRA adapter weights into the backbone and exports to ONNX.
+Produces one ONNX file per language (ONNX cannot hot-swap adapters at runtime).
+Requires: optimum[onnxruntime]
+"""
+from __future__ import annotations
+import logging
+from pathlib import Path
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from peft import PeftModel
+    from transformers import WhisperProcessor
+logger = logging.getLogger(__name__)
+class ONNXExporter:
+    """Merges a LoRA PeftModel into its base model and exports to ONNX."""
+    def merge_and_export(
+        self,
+        peft_model: "PeftModel",
+        processor: "WhisperProcessor",
+        output_dir: str,
+        language: str,
+    ) -> Path:
+        """
+        1. Merge LoRA weights into base model (merge_and_unload)
+        2. Export merged model to ONNX via optimum
+        Returns the output directory path.
+        """
+        output_path = Path(output_dir) / language
+        output_path.mkdir(parents=True, exist_ok=True)
+        logger.info("Merging LoRA adapter '%s' into base model...", language)
+        merged_model = peft_model.merge_and_unload()
+        merged_model.eval()
+        logger.info("Exporting to ONNX: %s", output_path)
+        self._export_with_optimum(merged_model, processor, str(output_path))
+        return output_path
+    def _export_with_optimum(
+        self,
+        merged_model,
+        processor: "WhisperProcessor",
+        output_dir: str,
+    ) -> None:
+        """Use optimum's ONNX export pipeline."""
+        from optimum.exporters.onnx import main_export
+        # Save merged model to a temp directory first
+        import tempfile
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            logger.info("Saving merged model to temp dir for export...")
+            merged_model.save_pretrained(tmp_dir)
+            processor.save_pretrained(tmp_dir)
+            logger.info("Running optimum ONNX export...")
+            main_export(
+                model_name_or_path=tmp_dir,
+                output=output_dir,
+                task="automatic-speech-recognition",
+                opset=17,
+                optimize="O2",
+            )
+        logger.info("ONNX export complete: %s", output_dir)
+    def validate(
+        self,
+        onnx_dir: str,
+        processor: "WhisperProcessor",
+        test_audio_arrays: list,
+        sample_rate: int = 16_000,
+        reference_texts: list[str] | None = None,
+    ) -> dict:
+        """
+        Run inference with the exported ONNX model and compute WER vs. references.
+        """
+        import numpy as np
+        from optimum.onnxruntime import ORTModelForSpeechSeq2Seq
+        logger.info("Validating ONNX model at %s...", onnx_dir)
+        ort_model = ORTModelForSpeechSeq2Seq.from_pretrained(onnx_dir)
+        transcriptions = []
+        for audio in test_audio_arrays:
+            inputs = processor(audio, sampling_rate=sample_rate, return_tensors="pt")
+            outputs = ort_model.generate(inputs.input_features)
+            text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+            transcriptions.append(text)
+        result = {"transcriptions": transcriptions}
+        if reference_texts:
+            import jiwer
+            wer = jiwer.wer(reference_texts, transcriptions)
+            result["wer"] = round(wer, 4)
+        return result

src/optimization/quantizer.py ADDED Viewed

	@@ -0,0 +1,95 @@

+"""
+BitsAndBytes quantization for GPU-constrained deployment.
+4-bit NF4: reduces Whisper-large-v3-turbo from ~3GB to ~1GB VRAM.
+8-bit: intermediate option with less accuracy loss.
+"""
+from __future__ import annotations
+import logging
+import time
+from typing import TYPE_CHECKING
+import torch
+from transformers import BitsAndBytesConfig, WhisperForConditionalGeneration, WhisperProcessor
+if TYPE_CHECKING:
+    pass
+logger = logging.getLogger(__name__)
+def load_4bit(model_id: str, hf_token: str | None = None) -> WhisperForConditionalGeneration:
+    """Load Whisper with 4-bit NF4 quantization. Reduces VRAM to ~1GB."""
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.float16,
+        bnb_4bit_use_double_quant=True,
+    )
+    logger.info("Loading %s with 4-bit NF4 quantization...", model_id)
+    model = WhisperForConditionalGeneration.from_pretrained(
+        model_id,
+        quantization_config=bnb_config,
+        device_map="auto",
+        token=hf_token,
+    )
+    return model
+def load_8bit(model_id: str, hf_token: str | None = None) -> WhisperForConditionalGeneration:
+    """Load Whisper with 8-bit quantization. Reduces VRAM to ~1.5GB."""
+    bnb_config = BitsAndBytesConfig(load_in_8bit=True)
+    logger.info("Loading %s with 8-bit quantization...", model_id)
+    model = WhisperForConditionalGeneration.from_pretrained(
+        model_id,
+        quantization_config=bnb_config,
+        device_map="auto",
+        token=hf_token,
+    )
+    return model
+class ModelQuantizer:
+    """Benchmarks quantized vs full-precision models."""
+    def __init__(self, model_id: str, hf_token: str | None = None) -> None:
+        self.model_id = model_id
+        self.hf_token = hf_token
+    def benchmark(
+        self,
+        model: WhisperForConditionalGeneration,
+        processor: WhisperProcessor,
+        test_audio_arrays: list,
+        sample_rate: int = 16_000,
+    ) -> dict:
+        """Measure latency and memory for a list of audio arrays."""
+        import numpy as np
+        device = next(model.parameters()).device
+        latencies = []
+        for audio in test_audio_arrays:
+            inputs = processor.feature_extractor(audio, sampling_rate=sample_rate, return_tensors="pt")
+            features = inputs.input_features.to(device)
+            if device.type == "cuda":
+                torch.cuda.synchronize()
+            t0 = time.perf_counter()
+            with torch.no_grad():
+                model.generate(features, max_new_tokens=50)
+            if device.type == "cuda":
+                torch.cuda.synchronize()
+            latencies.append((time.perf_counter() - t0) * 1000)
+        result = {
+            "mean_latency_ms": round(sum(latencies) / len(latencies), 1),
+            "max_latency_ms": round(max(latencies), 1),
+        }
+        if torch.cuda.is_available():
+            result["vram_allocated_gb"] = round(torch.cuda.memory_allocated() / 1e9, 2)
+        return result

src/optimization/tflite_converter.py ADDED Viewed

	@@ -0,0 +1,76 @@

+"""
+Converts ONNX models to TFLite for offline edge deployment (Android phones in rural areas).
+Note: Whisper's encoder and decoder are exported as separate TFLite models and
+orchestrated together at inference time.
+Requires: onnx-tf, tensorflow (install separately — large dependencies)
+"""
+from __future__ import annotations
+import logging
+from pathlib import Path
+logger = logging.getLogger(__name__)
+class TFLiteConverter:
+    """Converts ONNX Whisper models to TFLite format for edge deployment."""
+    def convert(
+        self,
+        onnx_encoder_path: str,
+        onnx_decoder_path: str,
+        output_dir: str,
+        quantize: bool = True,
+    ) -> dict[str, Path]:
+        """
+        Convert encoder and decoder ONNX models to TFLite.
+        Returns paths to the generated .tflite files.
+        """
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+        encoder_tflite = output_path / "encoder.tflite"
+        decoder_tflite = output_path / "decoder.tflite"
+        logger.info("Converting encoder ONNX → TFLite...")
+        self._onnx_to_tflite(onnx_encoder_path, str(encoder_tflite), quantize=quantize)
+        logger.info("Converting decoder ONNX → TFLite...")
+        self._onnx_to_tflite(onnx_decoder_path, str(decoder_tflite), quantize=quantize)
+        return {"encoder": encoder_tflite, "decoder": decoder_tflite}
+    def _onnx_to_tflite(self, onnx_path: str, output_path: str, quantize: bool) -> None:
+        """Convert a single ONNX model to TFLite via onnx-tf + tensorflow."""
+        try:
+            import onnx
+            import onnx_tf
+            import tensorflow as tf
+        except ImportError as e:
+            raise ImportError(
+                "TFLite conversion requires onnx-tf and tensorflow. "
+                "Install with: pip install onnx-tf tensorflow"
+            ) from e
+        import tempfile
+        # Step 1: ONNX → TensorFlow SavedModel
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            onnx_model = onnx.load(onnx_path)
+            tf_rep = onnx_tf.backend.prepare(onnx_model)
+            tf_rep.export_graph(tmp_dir)
+            # Step 2: TF SavedModel → TFLite
+            converter = tf.lite.TFLiteConverter.from_saved_model(tmp_dir)
+            if quantize:
+                converter.optimizations = [tf.lite.Optimize.DEFAULT]
+            tflite_model = converter.convert()
+        with open(output_path, "wb") as f:
+            f.write(tflite_model)
+        size_mb = Path(output_path).stat().st_size / 1e6
+        logger.info("TFLite model saved: %s (%.1f MB)", output_path, size_mb)

src/training/__init__.py ADDED Viewed

File without changes

src/training/callbacks.py ADDED Viewed

	@@ -0,0 +1,83 @@

+"""
+Custom HuggingFace Trainer callbacks:
+- EarlyStoppingOnWER: stops training when WER stops improving
+- AdapterCheckpointCallback: saves only adapter weights (not full model) per checkpoint
+"""
+from __future__ import annotations
+import logging
+from pathlib import Path
+from typing import TYPE_CHECKING
+from transformers import TrainerCallback, TrainerControl, TrainerState, TrainingArguments
+if TYPE_CHECKING:
+    pass
+logger = logging.getLogger(__name__)
+class EarlyStoppingOnWER(TrainerCallback):
+    """
+    Stops training if eval WER does not improve by min_delta over `patience` evaluations.
+    """
+    def __init__(self, patience: int = 5, min_delta: float = 0.001) -> None:
+        self.patience = patience
+        self.min_delta = min_delta
+        self._best_wer: float = float("inf")
+        self._no_improve_count: int = 0
+    def on_evaluate(
+        self,
+        args: TrainingArguments,
+        state: TrainerState,
+        control: TrainerControl,
+        metrics: dict,
+        **kwargs,
+    ) -> None:
+        wer = metrics.get("eval_wer")
+        if wer is None:
+            return
+        if wer < self._best_wer - self.min_delta:
+            self._best_wer = wer
+            self._no_improve_count = 0
+            logger.info("WER improved to %.4f", wer)
+        else:
+            self._no_improve_count += 1
+            logger.info(
+                "WER %.4f did not improve (best: %.4f). No-improve count: %d/%d",
+                wer, self._best_wer, self._no_improve_count, self.patience,
+            )
+            if self._no_improve_count >= self.patience:
+                logger.warning("Early stopping triggered after %d evaluations without improvement.", self.patience)
+                control.should_training_stop = True
+class AdapterCheckpointCallback(TrainerCallback):
+    """
+    Saves only the LoRA adapter weights on each checkpoint event.
+    Adapter weights are ~50MB vs ~3GB for the full model.
+    """
+    def __init__(self, adapter_output_dir: str) -> None:
+        self.adapter_output_dir = Path(adapter_output_dir)
+    def on_save(
+        self,
+        args: TrainingArguments,
+        state: TrainerState,
+        control: TrainerControl,
+        model,
+        **kwargs,
+    ) -> None:
+        checkpoint_dir = self.adapter_output_dir / f"checkpoint-{state.global_step}"
+        checkpoint_dir.mkdir(parents=True, exist_ok=True)
+        # model is a PeftModel — save only adapter weights
+        if hasattr(model, "save_pretrained"):
+            model.save_pretrained(str(checkpoint_dir))
+            logger.info("Adapter checkpoint saved: %s", checkpoint_dir)
+        else:
+            logger.warning("Model does not have save_pretrained — skipping adapter checkpoint.")