jcfossati commited on
Commit
d409ebd
·
verified ·
1 Parent(s): 81410ef

sync from GitHub repo (space/)

Browse files
Files changed (7) hide show
  1. .gitattributes +1 -0
  2. README.md +30 -2
  3. __pycache__/app.cpython-314.pyc +0 -0
  4. app.py +191 -0
  5. config.json +9 -0
  6. requirements.txt +5 -0
  7. tokenizer.json +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -4,10 +4,38 @@ emoji: 🏆
4
  colorFrom: green
5
  colorTo: green
6
  sdk: gradio
7
- sdk_version: 6.13.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
 
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  colorFrom: green
5
  colorTo: green
6
  sdk: gradio
7
+ sdk_version: 5.0.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
+ short_description: Ultra-fast yes/no/unknown classifier (FR+EN), 2ms CPU
12
+ models:
13
+ - jcfossati/ForSureLLM
14
  ---
15
 
16
+ # ForSureLLM interactive demo
17
+
18
+ This Space hosts the live demo of [ForSureLLM](https://github.com/jcfossati/ForSureLLM),
19
+ a 113 MB MiniLM-L12 multilingual model distilled from Claude Sonnet for
20
+ classifying short French/English phrases as `yes` / `no` / `unknown`.
21
+
22
+ The ONNX checkpoint is loaded from the
23
+ [jcfossati/ForSureLLM](https://huggingface.co/jcfossati/ForSureLLM) Model
24
+ repo at startup. Tokenizer and config are bundled in the Space.
25
+
26
+ ## Numbers
27
+
28
+ | Metric | Value |
29
+ |---|---|
30
+ | Adversarial accuracy (124 cases) | **95.2 %** |
31
+ | vs Haiku 4.5 zero-shot | **+20.2 pts** |
32
+ | vs Cosine MiniLM-L12 | **+27.5 pts** |
33
+ | Latency p50 (CPU) | 1.8 ms |
34
+ | Model size | 113 MB |
35
+
36
+ ## Source
37
+
38
+ App and tokenizer/config files are mirrored from
39
+ [`space/`](https://github.com/jcfossati/ForSureLLM/tree/main/space)
40
+ in the GitHub repo. Update via `python tools/deploy_space.py` after each
41
+ model retrain.
__pycache__/app.cpython-314.pyc ADDED
Binary file (12.1 kB). View file
 
app.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio demo for ForSureLLM hosted on HuggingFace Spaces.
2
+
3
+ Loads the ONNX model from the Model repo (jcfossati/ForSureLLM) at startup,
4
+ keeps a small inference function in memory, and exposes a simple yes/no/unknown
5
+ classifier UI with click-to-try examples.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import re
11
+ import time
12
+ import unicodedata
13
+ from pathlib import Path
14
+
15
+ import gradio as gr
16
+ import numpy as np
17
+ import onnxruntime as ort
18
+ from huggingface_hub import hf_hub_download
19
+ from tokenizers import Tokenizer
20
+
21
+ MODEL_REPO = "jcfossati/ForSureLLM"
22
+ ONNX_FILE = "forsurellm-int8.onnx"
23
+
24
+ # --- Load artefacts ---------------------------------------------------------
25
+ ROOT = Path(__file__).parent
26
+ TOKENIZER = Tokenizer.from_file(str(ROOT / "tokenizer.json"))
27
+ with (ROOT / "config.json").open(encoding="utf-8") as f:
28
+ CFG = json.load(f)
29
+ TOKENIZER.enable_truncation(max_length=CFG["max_length"])
30
+ CLASSES = CFG["classes"]
31
+ TEMPERATURE = float(CFG.get("temperature", 1.0))
32
+
33
+ print(f"[boot] downloading {ONNX_FILE} from {MODEL_REPO}...")
34
+ ONNX_PATH = hf_hub_download(MODEL_REPO, ONNX_FILE)
35
+ SESSION = ort.InferenceSession(ONNX_PATH, providers=["CPUExecutionProvider"])
36
+ INPUT_NAMES = {i.name for i in SESSION.get_inputs()}
37
+ print(f"[boot] ready (model {Path(ONNX_PATH).stat().st_size / 1024 / 1024:.0f} MB)")
38
+
39
+ # --- Preprocessing (mirror forsurellm/classifier.py) ------------------------
40
+ _HAS_LETTER_RE = re.compile(r"[^\W\d_]", re.UNICODE)
41
+ _WS_RE = re.compile(r"\s+")
42
+ _FRACTION_RE = re.compile(r"^([+-]?\d+(?:[.,]\d+)?)\s*/\s*(\d+(?:[.,]\d+)?)$")
43
+ _PERCENT_RE = re.compile(r"^([+-]?\d+(?:[.,]\d+)?)\s*%$")
44
+ _SIGNED_INT_RE = re.compile(r"^([+-])\s*(\d+)$")
45
+ _SYMBOLIC_YES = {"👍", "👍👍", "✅", "🆗", "💯", "💯💯", "++", "+", "✓", "✔", "✔️", "☑", "☑️"}
46
+ _SYMBOLIC_NO = {"👎", "👎👎", "❌", "🚫", "⛔", "🛑", "--", "✗", "✘", "✖", "✖️", "≠"}
47
+ _SYMBOLIC_UNK = {"?", "??", "???", "?!", "?!?", "🤷", "🤔", "😐", "😶", r"¯\_(ツ)_/¯"}
48
+
49
+
50
+ def _normalize(s: str) -> str:
51
+ s = unicodedata.normalize("NFC", s)
52
+ s = _WS_RE.sub(" ", s).strip()
53
+ return s.lower()
54
+
55
+
56
+ def _classify_symbolic(s: str) -> tuple[str, float] | None:
57
+ s = s.strip()
58
+ if not s:
59
+ return None
60
+ if s in _SYMBOLIC_YES:
61
+ return "yes", 1.0
62
+ if s in _SYMBOLIC_NO:
63
+ return "no", 1.0
64
+ if s in _SYMBOLIC_UNK:
65
+ return "unknown", 1.0
66
+ m = _FRACTION_RE.match(s)
67
+ if m:
68
+ try:
69
+ n, d = float(m.group(1).replace(",", ".")), float(m.group(2).replace(",", "."))
70
+ except ValueError:
71
+ return None
72
+ if d == 0:
73
+ return "unknown", 1.0
74
+ r = n / d
75
+ return ("yes", 1.0) if r >= 0.7 else ("no", 1.0) if r <= 0.3 else ("unknown", 1.0)
76
+ m = _PERCENT_RE.match(s)
77
+ if m:
78
+ try:
79
+ v = float(m.group(1).replace(",", ".")) / 100.0
80
+ except ValueError:
81
+ return None
82
+ return ("yes", 1.0) if v >= 0.7 else ("no", 1.0) if v <= 0.3 else ("unknown", 1.0)
83
+ m = _SIGNED_INT_RE.match(s)
84
+ if m:
85
+ sign, mag = m.group(1), int(m.group(2))
86
+ if mag == 0:
87
+ return "unknown", 1.0
88
+ return ("yes", 1.0) if sign == "+" else ("no", 1.0)
89
+ return None
90
+
91
+
92
+ def _softmax(x: np.ndarray) -> np.ndarray:
93
+ x = x - x.max(axis=-1, keepdims=True)
94
+ e = np.exp(x)
95
+ return e / e.sum(axis=-1, keepdims=True)
96
+
97
+
98
+ def classify(phrase: str) -> tuple[str, np.ndarray]:
99
+ sym = _classify_symbolic(phrase or "")
100
+ if sym is not None:
101
+ label, conf = sym
102
+ probs = np.zeros(3)
103
+ probs[CLASSES.index(label)] = conf
104
+ for i, c in enumerate(CLASSES):
105
+ if c != label:
106
+ probs[i] = (1 - conf) / 2
107
+ return label, probs
108
+ if not _HAS_LETTER_RE.search(phrase or ""):
109
+ return "unknown", np.array([0.0, 0.0, 1.0])
110
+ enc = TOKENIZER.encode(_normalize(phrase))
111
+ feeds = {"input_ids": np.array([enc.ids], dtype=np.int64),
112
+ "attention_mask": np.array([enc.attention_mask], dtype=np.int64)}
113
+ if "token_type_ids" in INPUT_NAMES:
114
+ feeds["token_type_ids"] = np.array([enc.type_ids], dtype=np.int64)
115
+ feeds = {k: v for k, v in feeds.items() if k in INPUT_NAMES}
116
+ logits = SESSION.run(None, feeds)[0][0]
117
+ probs = _softmax(logits / TEMPERATURE)
118
+ label = CLASSES[int(probs.argmax())]
119
+ return label, probs
120
+
121
+
122
+ # --- UI helpers -------------------------------------------------------------
123
+ LABEL_EMOJI = {"yes": "✅ YES", "no": "❌ NO", "unknown": "❓ UNKNOWN"}
124
+ LABEL_COLOR = {"yes": "#22c55e", "no": "#ef4444", "unknown": "#a3a3a3"}
125
+
126
+ EXAMPLES = [
127
+ ["carrément"],
128
+ ["tu rêves"],
129
+ ["np"],
130
+ ["oh toootally"],
131
+ ["bah oui"],
132
+ ["+1"],
133
+ ["is the pope catholic"],
134
+ ["je passe"],
135
+ ["yes mais non"],
136
+ ["no cap"],
137
+ ["mouais bof"],
138
+ ["100%"],
139
+ ["if I must"],
140
+ ["nan nan jamais"],
141
+ ]
142
+
143
+
144
+ def predict(phrase: str) -> tuple[str, dict, str]:
145
+ if not phrase or not phrase.strip():
146
+ return "—", {}, ""
147
+ t0 = time.perf_counter()
148
+ label, probs = classify(phrase)
149
+ elapsed_ms = (time.perf_counter() - t0) * 1000
150
+ badge = f"<div style='font-size:48px;font-weight:700;color:{LABEL_COLOR[label]};text-align:center'>{LABEL_EMOJI[label]}</div>"
151
+ dist = {c: float(p) for c, p in zip(CLASSES, probs)}
152
+ timing = f"<div style='text-align:center;color:#888;font-size:12px;margin-top:8px'>inférence : {elapsed_ms:.1f} ms</div>"
153
+ return badge, dist, timing
154
+
155
+
156
+ # --- Layout -----------------------------------------------------------------
157
+ DESCRIPTION = """
158
+ # ForSureLLM
159
+
160
+ Classifier yes/no/unknown ultra-rapide pour réponses courtes (FR + EN). Distillé de Claude Sonnet vers MiniLM-L12 multilingue.
161
+
162
+ - **95.2 %** sur 124 phrases adversarial (vs Haiku 4.5 zero-shot **75 %**, vs Cosine MiniLM **68 %**)
163
+ - **~2 ms** sur CPU, **113 MB** quantifié int8, **+20 pts** vs Haiku
164
+ - Préprocesseurs déterministes pour symboles (`+1`, `100%`, `10/10`, `👍`...)
165
+
166
+ [GitHub](https://github.com/jcfossati/ForSureLLM) · [Model](https://huggingface.co/jcfossati/ForSureLLM)
167
+ """
168
+
169
+ with gr.Blocks(title="ForSureLLM", theme=gr.themes.Soft()) as demo:
170
+ gr.Markdown(DESCRIPTION)
171
+ with gr.Row():
172
+ with gr.Column(scale=2):
173
+ inp = gr.Textbox(
174
+ label="Phrase à classer",
175
+ placeholder="Tape une phrase courte en français ou anglais",
176
+ lines=2,
177
+ autofocus=True,
178
+ )
179
+ btn = gr.Button("Classer", variant="primary")
180
+ with gr.Column(scale=3):
181
+ badge = gr.HTML(label="Résultat")
182
+ timing = gr.HTML()
183
+ dist = gr.Label(label="Distribution de probabilités", num_top_classes=3)
184
+
185
+ gr.Examples(examples=EXAMPLES, inputs=[inp], label="Exemples (clic pour tester)")
186
+
187
+ inp.submit(predict, inputs=[inp], outputs=[badge, dist, timing])
188
+ btn.click(predict, inputs=[inp], outputs=[badge, dist, timing])
189
+
190
+ if __name__ == "__main__":
191
+ demo.launch()
config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "classes": [
3
+ "yes",
4
+ "no",
5
+ "unknown"
6
+ ],
7
+ "max_length": 64,
8
+ "temperature": 0.6799102425575256
9
+ }
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio>=4.0,<6.0
2
+ onnxruntime>=1.16
3
+ tokenizers>=0.15
4
+ huggingface_hub>=0.20
5
+ numpy>=1.24
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719
3
+ size 17082987