Spaces:

RishiRP
/

Talk2TaskDemo1

Sleeping

App Files Files Community

RishiRP commited on Sep 23

Commit

e372e2c

verified ·

1 Parent(s): ae411c8

Update app.py

Browse files

Files changed (1) hide show

app.py +161 -179

app.py CHANGED Viewed

@@ -1,99 +1,113 @@
 # app.py
-# From Talk to Task — Batch & Single Task Extraction
-# Works on CPU / GPU / ZeroGPU. Uses a writable HF cache path (no /data).
-# If you want to use gated models (e.g., mistralai/Mistral-7B-Instruct-v0.2),
-# accept the license on HF and set HF_TOKEN in Space → Settings → Secrets.
 import os
 import io
 import re
-import sys
 import time
 import json
 import zipfile
 from pathlib import Path
-from typing import List, Dict, Tuple, Optional
 import gradio as gr
-# ====== Robust, writable HF cache ======
-# Avoid /data (read-only in Spaces). Prefer $HOME or /tmp.
 HOME = Path(os.environ.get("HOME", "/home/user"))
 CACHE_DIR = HOME / ".cache" / "huggingface"
 CACHE_DIR.mkdir(parents=True, exist_ok=True)
 os.environ.setdefault("HF_HOME", str(CACHE_DIR))
-# NOTE: TRANSFORMERS_CACHE is deprecated; HF_HOME is enough.
-os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")  # faster downloads when available
-HF_TOKEN = os.environ.get("HF_TOKEN", "").strip() or None
-# ====== Transformers safe import ======
 try:
     import torch
-    from transformers import (
-        AutoTokenizer,
-        AutoModelForCausalLM,
-        BitsAndBytesConfig,
-    )
 except Exception as e:
     raise RuntimeError(
-        "Failed to import transformers/torch. "
-        "Make sure requirements.txt includes: transformers>=4.41, torch, accelerate"
     ) from e
-DTYPE_FALLBACK = torch.float32
-if torch.cuda.is_available():
-    DTYPE_FALLBACK = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# ====== ZeroGPU (optional) ======
 try:
     import spaces  # noqa: F401
     ON_ZERO_GPU = True
 except Exception:
     ON_ZERO_GPU = False
-# ====== UI presets ======
-OPEN_MODEL_PRESETS = [
-    "HuggingFaceH4/zephyr-7b-beta",
-    "Qwen/Qwen2.5-7B-Instruct",
-    "tiiuae/falcon-7b-instruct",
-]
-PINNED_REVISIONS = {
-    "HuggingFaceH4/zephyr-7b-beta": None,
-    "Qwen/Qwen2.5-7B-Instruct": None,
-    "tiiuae/falcon-7b-instruct": None,
-    # "mistralai/Mistral-7B-Instruct-v0.2": None,  # gated — use only if token + license ok
-}
-SYSTEM_INSTRUCTIONS = (
-    "You are a task extraction assistant. Always output valid JSON with a field "
-    '"labels" (list of strings). Use only from this set: '
-    '["plan_contact","schedule_meeting","update_contact_info_non_postal",'
-    '"update_contact_info_postal_address","update_kyc_activity","update_kyc_origin_of_assets",'
-    '"update_kyc_purpose_of_businessrelation","update_kyc_total_assets"]. '
-    "Return JSON only."
-)
-CONTEXT_GUIDE = """\
-- plan_contact: conversation without a concrete meeting (no date/time)
-- schedule_meeting: explicit date/time/modality confirmation
-- update_contact_info_non_postal: changes to email/phone
-- update_contact_info_postal_address: changes to mailing address
-- update_kyc_*: KYC updates (activity, purpose, origin of assets, total assets)
-"""
-# ====== Utility ======
-def _json_only(text: str) -> str:
-    text = text.strip()
-    if text.startswith("{") and text.endswith("}"):
-        return text
-    m = re.search(r"\{.*\}", text, re.DOTALL)
     return m.group(0) if m else '{"labels": []}'
-def safe_json_loads(s: str) -> dict:
     try:
         return json.loads(s)
     except Exception:
@@ -101,78 +115,53 @@ def safe_json_loads(s: str) -> dict:
 def build_prompt(system: str, context: str, transcript: str) -> str:
     return (
-        f"### System:\n{system}\n\n"
-        f"### Context:\n{context}\n\n"
-        f"### Transcript:\n{transcript}\n\n"
-        "### Output:\nReturn JSON only."
     )
-# ====== Model wrapper ======
 class HFModel:
     def __init__(
         self,
         repo_id: str,
-        revision: Optional[str] = None,
-        load_in_4bit: bool = False,
-        trust_remote_code: bool = True,
-        dtype: Optional[torch.dtype] = None,
-        token: Optional[str] = None,
-    ) -> None:
         self.repo_id = repo_id
         self.revision = revision or "main"
-        self.trust_remote_code = trust_remote_code
         self.token = token
-        self.dtype = dtype or DTYPE_FALLBACK
         self.load_in_4bit = load_in_4bit and (DEVICE == "cuda")
         self.tokenizer = None
         self.model = None
     def load(self):
-        quant_cfg = None
-        if self.load_in_4bit:
-            quant_cfg = BitsAndBytesConfig(load_in_4bit=True)
-        try:
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                self.repo_id,
-                revision=self.revision,
-                token=self.token,
-                cache_dir=str(CACHE_DIR),
-                trust_remote_code=self.trust_remote_code,
-                use_fast=True,
-            )
-        except Exception as e:
-            raise RuntimeError(
-                f"Failed to load tokenizer for {self.repo_id} "
-                "(If gated, accept license and set HF_TOKEN in Space → Settings → Secrets)."
-            ) from e
-        try:
-            self.model = AutoModelForCausalLM.from_pretrained(
-                self.repo_id,
-                revision=self.revision,
-                token=self.token,
-                cache_dir=str(CACHE_DIR),
-                trust_remote_code=self.trust_remote_code,
-                torch_dtype=self.dtype,
-                device_map="auto" if DEVICE == "cuda" else None,
-                quantization_config=quant_cfg,
-                low_cpu_mem_usage=True,
-            )
-            if DEVICE == "cpu":
-                self.model = self.model.to(DEVICE)
-        except Exception as e:
-            raise RuntimeError(
-                f"Failed to load model weights for {self.repo_id}. "
-                "Check license, token, and hardware availability."
-            ) from e
     @torch.inference_mode()
-    def generate(self, prompt: str, max_new_tokens: int = 256, temperature: float = 0.1) -> str:
         tok = self.tokenizer
         mdl = self.model
         if tok.pad_token is None:
             tok.pad_token = tok.eos_token
         inputs = tok(prompt, return_tensors="pt").to(mdl.device)
         out = mdl.generate(
             **inputs,
@@ -183,28 +172,29 @@ class HFModel:
             pad_token_id=tok.eos_token_id,
             eos_token_id=tok.eos_token_id,
         )
-        text = tok.decode(out[0], skip_special_tokens=True)
-        gen = text[len(prompt):].strip() if text.startswith(prompt) else text
-        return _json_only(gen)
-# ====== Model cache (per Space worker) ======
 _MODEL_CACHE: Dict[Tuple[str, Optional[str], bool], HFModel] = {}
 def get_model(repo_id: str, revision: Optional[str], load_in_4bit: bool) -> HFModel:
     key = (repo_id, revision, load_in_4bit)
     if key in _MODEL_CACHE:
         return _MODEL_CACHE[key]
-    model = HFModel(
-        repo_id=repo_id,
-        revision=revision,
-        load_in_4bit=load_in_4bit,
-        token=HF_TOKEN,
-    )
-    model.load()
-    _MODEL_CACHE[key] = model
-    return model
-# ====== Single transcript inference ======
 def run_single(
     model_choice: str,
     custom_repo_id: str,
@@ -217,18 +207,18 @@ def run_single(
     add_header: bool,
     strip_smalltalk: bool,
     load_in_4bit: bool,
-) -> Tuple[str, str, str, str]:
     debug = []
     t0 = time.perf_counter()
-    repo = (custom_repo_id or model_choice).strip()
     rev = PINNED_REVISIONS.get(repo, None)
-    debug.append(f"Repo: {repo} | Revision: {rev or 'main'} | 4bit: {load_in_4bit} | Device: {DEVICE}")
     if preprocess:
         lines = [ln.rstrip() for ln in transcript.splitlines()]
         if strip_smalltalk:
-            lines = [ln for ln in lines if not re.search(r"\b(thanks?|bye|ok(ay)?)\b", ln, re.I)]
         transcript = "\n".join(lines[-32768:])
         if add_header:
             transcript = f"[EMAIL/MESSAGE SIGNAL]\n{transcript}"
@@ -242,31 +232,30 @@ def run_single(
     try:
         model = get_model(repo, rev, load_in_4bit)
-        raw = model.generate(prompt, max_new_tokens=256, temperature=0.1)
-        data = safe_json_loads(raw)
         out_json = json.dumps(data, ensure_ascii=False)
-        debug.append(f"Generation OK in {time.perf_counter()-t0:.2f}s")
         return repo, (rev or "main"), out_json, "\n".join(debug)
     except Exception as e:
         debug.append(f"ERROR: {e}")
         return repo, (rev or "main"), json.dumps({"labels": []}), "\n".join(debug)
-# ====== Batch (ZIP of many .txt files) ======
 def run_batch(
     model_choice: str,
     custom_repo_id: str,
     system: str,
     context: str,
-    zip_file: Optional[io.BytesIO],
     soft_token_cap: int,
     preprocess: bool,
     lines_window: int,
     add_header: bool,
     strip_smalltalk: bool,
     load_in_4bit: bool,
-) -> Tuple[str, str, str, str]:
     debug = []
-    repo = (custom_repo_id or model_choice).strip()
     rev = PINNED_REVISIONS.get(repo, None)
     if not zip_file:
@@ -275,12 +264,13 @@ def run_batch(
     try:
         z = zipfile.ZipFile(zip_file)
         names = [n for n in z.namelist() if n.lower().endswith(".txt")]
-        debug.append(f"Files detected: {len(names)}")
     except Exception as e:
         return repo, (rev or "main"), "filename,labels\n", f"Bad ZIP: {e}"
     try:
-        model = get_model(repo, rev, load_in_4bit)
     except Exception as e:
         return repo, (rev or "main"), "filename,labels\n", f"Model load error: {e}"
@@ -288,46 +278,43 @@ def run_batch(
     for name in names:
         try:
             txt = z.read(name).decode("utf-8", errors="replace")
-            _, _, labels_json, _ = run_single(
                 model_choice, custom_repo_id, system, context, txt,
-                soft_token_cap, preprocess, lines_window, add_header,
-                strip_smalltalk, load_in_4bit
             )
-            labels = safe_json_loads(labels_json).get("labels", [])
             rows.append(f"{name},{json.dumps(labels, ensure_ascii=False)}")
         except Exception as e:
             rows.append(f"{name},[]  # error: {e}")
     return repo, (rev or "main"), "\n".join(rows), "\n".join(debug)
-# ====== Gradio UI ======
-with gr.Blocks(title="From Talk to Task — Batch & Single Task Extraction") as demo:
     gr.Markdown(
-        """
-        # From Talk to Task — Batch & Single Task Extraction
-        **Tip:** Use **open models** first (no gating). If you pick a gated model, make sure
-        you have accepted its license _and_ set `HF_TOKEN` in **Settings → Secrets**.
-        **Pinned revisions:** {}
-        """.format(
-            ", ".join([f"{k}@{v or 'main'}" for k, v in PINNED_REVISIONS.items()])
-        )
     )
     with gr.Row():
         model_choice = gr.Dropdown(
-            OPEN_MODEL_PRESETS,
-            label="Model (Open presets — no gating)",
-            value=OPEN_MODEL_PRESETS[0],
         )
-        custom_repo_id = gr.Textbox(
             label="Custom model repo id (overrides preset)",
-            placeholder="e.g. mistralai/Mistral-7B-Instruct-v0.2 (requires license + HF_TOKEN)"
         )
-    system = gr.Textbox(label="Instructions (System)", value=SYSTEM_INSTRUCTIONS, lines=5)
-    context = gr.Textbox(label="Context (User prefix before transcript)", value=CONTEXT_GUIDE, lines=6)
     with gr.Row():
         soft_cap = gr.Slider(1024, 32768, value=8192, step=1, label="Soft token cap")
@@ -339,22 +326,21 @@ with gr.Blocks(title="From Talk to Task — Batch & Single Task Extraction") as
         load_4bit = gr.Checkbox(value=False, label="Load in 4-bit (GPU only)")
     with gr.Tabs():
-        with gr.Tab("Single Transcript (default)"):
-            transcript = gr.Textbox(label="Paste transcript text", lines=12, placeholder="Paste your transcript here...")
             run_btn = gr.Button("Run (Single)", variant="primary")
             repo_used = gr.Textbox(label="Repo used", interactive=False)
             rev_used = gr.Textbox(label="Revision", interactive=False)
             json_out = gr.Code(label="JSON Output", language="json")
             debug_out = gr.Textbox(label="Diagnostics", lines=6)
-            def _run_single(*args):
-                r, v, j, d = run_single(*args)
-                return r, v, j, d
             run_btn.click(
-                _run_single,
                 inputs=[
-                    model_choice, custom_repo_id, system, context, transcript,
                     soft_cap, preprocess, lines_window, add_header, strip_smalltalk, load_4bit
                 ],
                 outputs=[repo_used, rev_used, json_out, debug_out],
@@ -365,29 +351,25 @@ with gr.Blocks(title="From Talk to Task — Batch & Single Task Extraction") as
             run_batch_btn = gr.Button("Run (Batch)", variant="primary")
             repo_used_b = gr.Textbox(label="Repo used", interactive=False)
             rev_used_b = gr.Textbox(label="Revision", interactive=False)
-            # FIX: use Textbox for CSV; Code(language="text") is not supported.
             csv_out = gr.Textbox(label="CSV (filename,labels)", lines=12)
             debug_out_b = gr.Textbox(label="Diagnostics", lines=6)
-            def _run_batch(*args):
-                r, v, c, d = run_batch(*args)
-                return r, v, c, d
             run_batch_btn.click(
-                _run_batch,
                 inputs=[
-                    model_choice, custom_repo_id, system, context, zip_in,
                     soft_cap, preprocess, lines_window, add_header, strip_smalltalk, load_4bit
                 ],
                 outputs=[repo_used_b, rev_used_b, csv_out, debug_out_b],
             )
     gr.Markdown(
-        f"""
-        - **HF_TOKEN detected:** {"✅ yes" if HF_TOKEN else "⚠️ no (only needed for gated models)"}
-        - **Device:** {DEVICE}
-        - **Cache dir:** `{CACHE_DIR}`
-        """
     )
 if __name__ == "__main__":

 # app.py
+# From Talk to Task — Batch & Single Task Extraction (Multilingual: EN/FR/DE/IT)
+# Default model: Swiss Apertus instruct (set APERTUS_REPO below).
+# Works on CPU / GPU / ZeroGPU. Uses a writable HF cache. JSON-only outputs.
 import os
 import io
 import re
 import time
 import json
 import zipfile
 from pathlib import Path
+from typing import Dict, Tuple, Optional
 import gradio as gr
+# --------------------------- CONFIG ---------------------------------
+# <<< SET THIS TO YOUR APERTUS MODEL REPO ID >>>
+# Example: "ApertusAI/swiss-apertus-7b-instruct"  (replace with your actual repo id)
+APERTUS_REPO = "swiss-ai/Apertus-8B-Instruct-2509"
+# Optional: fallback open models (no gating) to sanity-check UI quickly
+OPEN_FALLBACKS = [
+    "HuggingFaceH4/zephyr-7b-beta",
+    "Qwen/Qwen2.5-7B-Instruct",
+    "tiiuae/falcon-7b-instruct",
+]
+PINNED_REVISIONS = {
+    # None => "main"
+    # Put your Apertus revision here if you want to pin it:
+    APERTUS_REPO: None,
+    "HuggingFaceH4/zephyr-7b-beta": None,
+    "Qwen/Qwen2.5-7B-Instruct": None,
+    "tiiuae/falcon-7b-instruct": None,
+}
+# Multilingual, but labels must be English and from this fixed set:
+LABEL_SET = [
+    "plan_contact",
+    "schedule_meeting",
+    "update_contact_info_non_postal",
+    "update_contact_info_postal_address",
+    "update_kyc_activity",
+    "update_kyc_origin_of_assets",
+    "update_kyc_purpose_of_businessrelation",
+    "update_kyc_total_assets",
+]
+SYSTEM_INSTRUCTIONS = (
+    "You are a task extraction assistant.\n"
+    "Input transcript language can be English, French, German, or Italian. "
+    "You MUST output valid JSON ONLY (no prose), with a single field:\n"
+    '"labels": a list of strings chosen ONLY from the set:\n'
+    f"{LABEL_SET}\n"
+    "Do not invent other fields. Do not translate labels. Return JSON only."
+)
+CONTEXT_GUIDE = (
+    "- plan_contact: contact without firm date/time\n"
+    "- schedule_meeting: explicit date/time/modality confirmed\n"
+    "- update_contact_info_non_postal: email/phone updates\n"
+    "- update_contact_info_postal_address: mailing address updates\n"
+    "- update_kyc_*: KYC updates (activity, purpose, origin of assets, total assets)\n"
+)
+# --------------------- WRITABLE HF CACHE -----------------------------
 HOME = Path(os.environ.get("HOME", "/home/user"))
 CACHE_DIR = HOME / ".cache" / "huggingface"
 CACHE_DIR.mkdir(parents=True, exist_ok=True)
 os.environ.setdefault("HF_HOME", str(CACHE_DIR))
+os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")  # faster downloads when supported
+HF_TOKEN = (os.environ.get("HF_TOKEN") or "").strip() or None
+# -------------------- TRANSFORMERS / TORCH ---------------------------
 try:
     import torch
+    from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 except Exception as e:
     raise RuntimeError(
+        "Missing deps. In requirements.txt include: transformers>=4.41, torch, accelerate, huggingface_hub"
     ) from e
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DTYPE_FALLBACK = (
+    torch.bfloat16 if (DEVICE == "cuda" and torch.cuda.is_bf16_supported()) else
+    (torch.float16 if DEVICE == "cuda" else torch.float32)
+)
+# ZeroGPU presence (optional)
 try:
     import spaces  # noqa: F401
     ON_ZERO_GPU = True
 except Exception:
     ON_ZERO_GPU = False
+# -------------------------- HELPERS ---------------------------------
+def _json_from_text(text: str) -> str:
+    s = text.strip()
+    if s.startswith("{") and s.endswith("}"):
+        return s
+    m = re.search(r"\{.*\}", s, re.DOTALL)
     return m.group(0) if m else '{"labels": []}'
+def safe_json(s: str) -> dict:
     try:
         return json.loads(s)
     except Exception:
 def build_prompt(system: str, context: str, transcript: str) -> str:
     return (
+        f"### System\n{system}\n\n"
+        f"### Context\n{context}\n\n"
+        f"### Transcript\n{transcript}\n\n"
+        "### Output\nReturn JSON only."
     )
+# -------------------------- MODEL -----------------------------------
 class HFModel:
     def __init__(
         self,
         repo_id: str,
+        revision: Optional[str],
+        token: Optional[str],
+        load_in_4bit: bool,
+        dtype
+    ):
         self.repo_id = repo_id
         self.revision = revision or "main"
         self.token = token
         self.load_in_4bit = load_in_4bit and (DEVICE == "cuda")
+        self.dtype = dtype
         self.tokenizer = None
         self.model = None
     def load(self):
+        qcfg = BitsAndBytesConfig(load_in_4bit=True) if self.load_in_4bit else None
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.repo_id, revision=self.revision, token=self.token,
+            cache_dir=str(CACHE_DIR), use_fast=True, trust_remote_code=True
+        )
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.repo_id, revision=self.revision, token=self.token,
+            cache_dir=str(CACHE_DIR), trust_remote_code=True,
+            torch_dtype=self.dtype,
+            device_map="auto" if DEVICE == "cuda" else None,
+            quantization_config=qcfg, low_cpu_mem_usage=True
+        )
+        if DEVICE == "cpu":
+            self.model = self.model.to(DEVICE)
     @torch.inference_mode()
+    def generate(self, prompt: str, max_new_tokens=256, temperature=0.1) -> str:
         tok = self.tokenizer
         mdl = self.model
         if tok.pad_token is None:
             tok.pad_token = tok.eos_token
         inputs = tok(prompt, return_tensors="pt").to(mdl.device)
         out = mdl.generate(
             **inputs,
             pad_token_id=tok.eos_token_id,
             eos_token_id=tok.eos_token_id,
         )
+        decoded = tok.decode(out[0], skip_special_tokens=True)
+        gen = decoded[len(prompt):].strip() if decoded.startswith(prompt) else decoded
+        return _json_from_text(gen)
 _MODEL_CACHE: Dict[Tuple[str, Optional[str], bool], HFModel] = {}
 def get_model(repo_id: str, revision: Optional[str], load_in_4bit: bool) -> HFModel:
     key = (repo_id, revision, load_in_4bit)
     if key in _MODEL_CACHE:
         return _MODEL_CACHE[key]
+    mdl = HFModel(repo_id, revision, HF_TOKEN, load_in_4bit, DTYPE_FALLBACK)
+    try:
+        mdl.load()
+    except Exception as e:
+        raise RuntimeError(
+            f"Model load failed for {repo_id}@{revision or 'main'} — "
+            "If this is a gated/private model, ensure you accepted its license and set HF_TOKEN."
+        ) from e
+    _MODEL_CACHE[key] = mdl
+    return mdl
+# ---------------------- INFERENCE ROUTES ----------------------------
 def run_single(
     model_choice: str,
     custom_repo_id: str,
     add_header: bool,
     strip_smalltalk: bool,
     load_in_4bit: bool,
+):
     debug = []
     t0 = time.perf_counter()
+    repo = (custom_repo_id or model_choice or APERTUS_REPO).strip()
     rev = PINNED_REVISIONS.get(repo, None)
+    debug.append(f"Repo: {repo} | Rev: {rev or 'main'} | Dev: {DEVICE} | 4bit: {load_in_4bit}")
     if preprocess:
         lines = [ln.rstrip() for ln in transcript.splitlines()]
         if strip_smalltalk:
+            lines = [ln for ln in lines if not re.search(r"\b(thanks?|merci|grazie|danke|bye|tsch(ü|u)ss|ciao|ok(ay)?)\b", ln, re.I)]
         transcript = "\n".join(lines[-32768:])
         if add_header:
             transcript = f"[EMAIL/MESSAGE SIGNAL]\n{transcript}"
     try:
         model = get_model(repo, rev, load_in_4bit)
+        raw = model.generate(prompt)
+        data = safe_json(raw)
         out_json = json.dumps(data, ensure_ascii=False)
+        debug.append(f"Done in {time.perf_counter()-t0:.2f}s")
         return repo, (rev or "main"), out_json, "\n".join(debug)
     except Exception as e:
         debug.append(f"ERROR: {e}")
         return repo, (rev or "main"), json.dumps({"labels": []}), "\n".join(debug)
 def run_batch(
     model_choice: str,
     custom_repo_id: str,
     system: str,
     context: str,
+    zip_file,
     soft_token_cap: int,
     preprocess: bool,
     lines_window: int,
     add_header: bool,
     strip_smalltalk: bool,
     load_in_4bit: bool,
+):
     debug = []
+    repo = (custom_repo_id or model_choice or APERTUS_REPO).strip()
     rev = PINNED_REVISIONS.get(repo, None)
     if not zip_file:
     try:
         z = zipfile.ZipFile(zip_file)
         names = [n for n in z.namelist() if n.lower().endswith(".txt")]
+        debug.append(f"Files: {len(names)}")
     except Exception as e:
         return repo, (rev or "main"), "filename,labels\n", f"Bad ZIP: {e}"
+    # Warm model once
     try:
+        _ = get_model(repo, rev, load_in_4bit)
     except Exception as e:
         return repo, (rev or "main"), "filename,labels\n", f"Model load error: {e}"
     for name in names:
         try:
             txt = z.read(name).decode("utf-8", errors="replace")
+            _, _, j, _ = run_single(
                 model_choice, custom_repo_id, system, context, txt,
+                soft_token_cap, preprocess, lines_window, add_header, strip_smalltalk, load_in_4bit
             )
+            labels = safe_json(j).get("labels", [])
             rows.append(f"{name},{json.dumps(labels, ensure_ascii=False)}")
         except Exception as e:
             rows.append(f"{name},[]  # error: {e}")
     return repo, (rev or "main"), "\n".join(rows), "\n".join(debug)
+# ----------------------------- UI -----------------------------------
+with gr.Blocks(title="From Talk to Task — Multilingual (EN/FR/DE/IT)") as demo:
     gr.Markdown(
+        f"""
+        # From Talk to Task — Multilingual (EN/FR/DE/IT)
+        **Default model:** `{APERTUS_REPO or 'PLEASE SET APERTUS_REPO'}`
+        You can override with a custom repo id below.
+        Pinned revisions: {", ".join([f"{k}@{v or 'main'}" for k, v in PINNED_REVISIONS.items()])}
+        """
     )
     with gr.Row():
         model_choice = gr.Dropdown(
+            [APERTUS_REPO] + OPEN_FALLBACKS, label="Model presets",
+            value=APERTUS_REPO if APERTUS_REPO else (OPEN_FALLBACKS[0] if OPEN_FALLBACKS else "")
         )
+        custom_repo = gr.Textbox(
             label="Custom model repo id (overrides preset)",
+            placeholder="e.g. ApertusAI/swiss-apertus-7b-instruct (requires license + HF_TOKEN if gated)"
         )
+    system = gr.Textbox(label="Instructions (System)", value=SYSTEM_INSTRUCTIONS, lines=6)
+    context = gr.Textbox(label="Context (User prefix)", value=CONTEXT_GUIDE, lines=6)
     with gr.Row():
         soft_cap = gr.Slider(1024, 32768, value=8192, step=1, label="Soft token cap")
         load_4bit = gr.Checkbox(value=False, label="Load in 4-bit (GPU only)")
     with gr.Tabs():
+        with gr.Tab("Single Transcript"):
+            transcript = gr.Textbox(label="Paste transcript (EN/FR/DE/IT)", lines=12)
             run_btn = gr.Button("Run (Single)", variant="primary")
             repo_used = gr.Textbox(label="Repo used", interactive=False)
             rev_used = gr.Textbox(label="Revision", interactive=False)
             json_out = gr.Code(label="JSON Output", language="json")
             debug_out = gr.Textbox(label="Diagnostics", lines=6)
+            def _single(*args):
+                return run_single(*args)
             run_btn.click(
+                _single,
                 inputs=[
+                    model_choice, custom_repo, system, context, transcript,
                     soft_cap, preprocess, lines_window, add_header, strip_smalltalk, load_4bit
                 ],
                 outputs=[repo_used, rev_used, json_out, debug_out],
             run_batch_btn = gr.Button("Run (Batch)", variant="primary")
             repo_used_b = gr.Textbox(label="Repo used", interactive=False)
             rev_used_b = gr.Textbox(label="Revision", interactive=False)
             csv_out = gr.Textbox(label="CSV (filename,labels)", lines=12)
             debug_out_b = gr.Textbox(label="Diagnostics", lines=6)
+            def _batch(*args):
+                return run_batch(*args)
             run_batch_btn.click(
+                _batch,
                 inputs=[
+                    model_choice, custom_repo, system, context, zip_in,
                     soft_cap, preprocess, lines_window, add_header, strip_smalltalk, load_4bit
                 ],
                 outputs=[repo_used_b, rev_used_b, csv_out, debug_out_b],
             )
     gr.Markdown(
+        f"- **HF_TOKEN:** {'✅ set' if HF_TOKEN else '⚠️ not set (only needed for gated/private)'}  \n"
+        f"- **Device:** {DEVICE}  \n"
+        f"- **Cache dir:** `{CACHE_DIR}`"
     )
 if __name__ == "__main__":