Commit
·
6041788
1
Parent(s):
357b19c
Update index.html
Browse filesSwitch back to Qwen3-0.6B ONNX with config shim
- Updated model registry to use onnx-community/Qwen3-0.6B-ONNX
- Added QWEN3_CONFIG_FIX to map model_type "qwen3" → Qwen2ForCausalLM
- Patched loadModel to apply shim only for qwen
- Preserved WASM single-thread backend, progress bar, and tensor-safe logits
- index.html +56 -60
index.html
CHANGED
|
@@ -41,14 +41,13 @@
|
|
| 41 |
.help { border-bottom:1px dotted #9ab0d0; cursor:help; }
|
| 42 |
</style>
|
| 43 |
|
| 44 |
-
<!-- Transformers.js
|
| 45 |
<script type="module">
|
| 46 |
import {
|
| 47 |
env,
|
| 48 |
AutoTokenizer,
|
| 49 |
AutoModelForCausalLM
|
| 50 |
} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0";
|
| 51 |
-
// Expose for our script below
|
| 52 |
window.HF = { env, AutoTokenizer, AutoModelForCausalLM };
|
| 53 |
</script>
|
| 54 |
</head>
|
|
@@ -69,8 +68,8 @@
|
|
| 69 |
<div class="row" style="justify-content:space-between; margin-bottom:8px;">
|
| 70 |
<div class="inline">
|
| 71 |
<span class="muted small">Model:</span>
|
| 72 |
-
<select id="model" class="select"
|
| 73 |
-
<option value="qwen" selected>
|
| 74 |
<option value="distilgpt2">distilgpt2 (local → Hub fallback)</option>
|
| 75 |
</select>
|
| 76 |
</div>
|
|
@@ -124,18 +123,17 @@
|
|
| 124 |
|
| 125 |
<script type="module">
|
| 126 |
const { env, AutoTokenizer, AutoModelForCausalLM } = window.HF;
|
| 127 |
-
|
| 128 |
-
/* ---------- ONNX Runtime Web backend selection ---------- */
|
| 129 |
-
|
| 130 |
-
env.backends.onnx.
|
| 131 |
-
env.backends.onnx.
|
| 132 |
-
env.backends.onnx.wasm.
|
| 133 |
-
env.backends.onnx.wasm.proxy = false; // main thread
|
| 134 |
if (typeof env.backends.onnx.wasm.jsep !== "undefined") {
|
| 135 |
-
env.backends.onnx.wasm.jsep = false;
|
| 136 |
}
|
| 137 |
|
| 138 |
-
/* ----------
|
| 139 |
const $ = (s) => document.querySelector(s);
|
| 140 |
const statusEl = $('#status'), barEl = $('#bar'), errEl = $('#error');
|
| 141 |
const textEl = $('#text'), klistEl = $('#klist'), timeEl = $('#time');
|
|
@@ -144,25 +142,22 @@
|
|
| 144 |
const embCanvas = $('#embCanvas'), embCtx = embCanvas.getContext('2d');
|
| 145 |
const embStatus = $('#embStatus');
|
| 146 |
|
| 147 |
-
// Robust absolute-URL resolver (works even if window.location is unavailable)
|
| 148 |
function ABS(p) {
|
| 149 |
const base = (typeof document !== "undefined" && document.baseURI)
|
| 150 |
? document.baseURI
|
| 151 |
: (typeof location !== "undefined" ? location.href : "https://");
|
| 152 |
return new URL(p, base).href;
|
| 153 |
}
|
| 154 |
-
|
| 155 |
function setStatus(t){ if(statusEl) statusEl.textContent = t; }
|
| 156 |
function setErr(e){ errEl.textContent = e || ""; }
|
| 157 |
function showToken(s){ if (s === "\n") return "⏎"; if (s.trim() === "") return `␣${s.length>1 ? "×"+s.length : ""}`; return s; }
|
| 158 |
const PUNC_ONLY = /^[\s.,;:!?—-]+$/;
|
| 159 |
|
| 160 |
-
/* ---------- Byte-accurate progress
|
| 161 |
-
const transfers = new Map();
|
| 162 |
function resetProgress(){ transfers.clear(); if (barEl) barEl.style.width = "0%"; }
|
| 163 |
function fmtMB(b){ return (b/1024/1024).toFixed(1) + " MB"; }
|
| 164 |
function onProgress(evt){
|
| 165 |
-
// evt: { status, file?, loaded?, total? }
|
| 166 |
if (evt.file && evt.loaded != null) {
|
| 167 |
const prev = transfers.get(evt.file) || { loaded: 0, total: evt.total || 0 };
|
| 168 |
const total = evt.total != null ? evt.total : prev.total;
|
|
@@ -180,21 +175,11 @@
|
|
| 180 |
setStatus(evt.status);
|
| 181 |
}
|
| 182 |
}
|
| 183 |
-
// simple stall watchdog (UI hint only)
|
| 184 |
-
let lastBytes = 0, lastTick = Date.now();
|
| 185 |
-
setInterval(() => {
|
| 186 |
-
const bytes = [...transfers.values()].reduce((s,v)=>s+(v.loaded||0),0);
|
| 187 |
-
if (bytes > lastBytes) { lastBytes = bytes; lastTick = Date.now(); }
|
| 188 |
-
if ((Date.now()-lastTick)/1000 > 25 && statusEl.textContent.startsWith("Downloading")) {
|
| 189 |
-
setErr("Download seems idle. Check your network, or try the smaller model in the menu.");
|
| 190 |
-
}
|
| 191 |
-
}, 5000);
|
| 192 |
|
| 193 |
/* ---------- Model registry ---------- */
|
| 194 |
const MODELS = {
|
| 195 |
qwen: {
|
| 196 |
-
|
| 197 |
-
remote: "onnx-community/Qwen2.5-0.5B-Instruct",
|
| 198 |
dtype: "int8",
|
| 199 |
emb: {
|
| 200 |
coords: ABS("assets/embeddings/qwen_pca_top5k_coords.json"),
|
|
@@ -212,6 +197,12 @@
|
|
| 212 |
}
|
| 213 |
};
|
| 214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
/* ---------- Embedding viewer ---------- */
|
| 216 |
const Emb = (() => {
|
| 217 |
let coordsPath = "", nbrsPath = "";
|
|
@@ -258,10 +249,11 @@
|
|
| 258 |
const cfg = MODELS[key];
|
| 259 |
const mySeq = ++loadSeq;
|
| 260 |
|
| 261 |
-
// Embeddings
|
| 262 |
Emb.setSources(key);
|
| 263 |
try { await Emb.load(); } catch { embStatus.textContent = "Map failed to load"; }
|
| 264 |
|
|
|
|
| 265 |
setErr(""); setStatus("Loading tokenizer…"); resetProgress();
|
| 266 |
try {
|
| 267 |
tokenizer = await AutoTokenizer.from_pretrained(cfg.remote, { progress_callback: onProgress });
|
|
@@ -272,12 +264,21 @@
|
|
| 272 |
}
|
| 273 |
if (mySeq !== loadSeq) return;
|
| 274 |
|
|
|
|
| 275 |
setStatus("Loading model…"); resetProgress();
|
| 276 |
try {
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
} catch (e) {
|
| 282 |
console.error("Model load failed:", e);
|
| 283 |
setErr("Model failed to load. Check your connection or try the other model.");
|
|
@@ -285,6 +286,7 @@
|
|
| 285 |
}
|
| 286 |
if (mySeq !== loadSeq) return;
|
| 287 |
|
|
|
|
| 288 |
setStatus("Warming up…");
|
| 289 |
const enc = await tokenizer(" ", { add_special_tokens: false, return_attention_mask: true });
|
| 290 |
await model({ input_ids: enc.input_ids, attention_mask: enc.attention_mask });
|
|
@@ -300,37 +302,28 @@
|
|
| 300 |
const out = await model({ input_ids: enc.input_ids, attention_mask: enc.attention_mask });
|
| 301 |
const dt = (performance.now() - t0) | 0;
|
| 302 |
|
| 303 |
-
//
|
| 304 |
const logitsT = out.logits;
|
| 305 |
-
const dims = logitsT.dims;
|
| 306 |
-
const data = logitsT.data;
|
| 307 |
-
|
| 308 |
const vocabSize = dims[dims.length - 1];
|
| 309 |
const seqLen = dims[dims.length - 2];
|
| 310 |
-
|
| 311 |
-
// Take the last time step (length = vocabSize) from the flat buffer
|
| 312 |
const start = (seqLen - 1) * vocabSize;
|
| 313 |
-
const last = data.subarray(start, start + vocabSize);
|
| 314 |
-
|
| 315 |
-
//
|
| 316 |
-
let m = -Infinity;
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
const exps = new Float32Array(last.length);
|
| 320 |
-
let Z = 0;
|
| 321 |
for (let i = 0; i < last.length; i++) { const e = Math.exp(last[i] - m); exps[i] = e; Z += e; }
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
const K = Math.min(parseInt(topkSel.value, 10) || 10, last.length);
|
| 325 |
const idx = Array.from({ length: last.length }, (_, i) => [exps[i] / Z, i])
|
| 326 |
-
.sort((a, b) => b[0] - a[0])
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
// Build rows
|
| 330 |
const rows = [];
|
| 331 |
-
for (const [p, i] of idx)
|
| 332 |
-
const tok = await tokenizer.decode([i], { skip_special_tokens:
|
| 333 |
-
rows.push({ token: tok, p, id:
|
| 334 |
}
|
| 335 |
return { rows, dt };
|
| 336 |
}
|
|
@@ -374,10 +367,13 @@
|
|
| 374 |
|
| 375 |
/* ---------- Boot ---------- */
|
| 376 |
(async function init(){
|
| 377 |
-
|
|
|
|
|
|
|
|
|
|
| 378 |
if (!textEl.value) textEl.value = "Twinkle, twinkle, little ";
|
| 379 |
await predict();
|
| 380 |
})();
|
| 381 |
</script>
|
| 382 |
</body>
|
| 383 |
-
</html>
|
|
|
|
| 41 |
.help { border-bottom:1px dotted #9ab0d0; cursor:help; }
|
| 42 |
</style>
|
| 43 |
|
| 44 |
+
<!-- Transformers.js (browser) -->
|
| 45 |
<script type="module">
|
| 46 |
import {
|
| 47 |
env,
|
| 48 |
AutoTokenizer,
|
| 49 |
AutoModelForCausalLM
|
| 50 |
} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0";
|
|
|
|
| 51 |
window.HF = { env, AutoTokenizer, AutoModelForCausalLM };
|
| 52 |
</script>
|
| 53 |
</head>
|
|
|
|
| 68 |
<div class="row" style="justify-content:space-between; margin-bottom:8px;">
|
| 69 |
<div class="inline">
|
| 70 |
<span class="muted small">Model:</span>
|
| 71 |
+
<select id="model" class="select">
|
| 72 |
+
<option value="qwen" selected>Qwen3-0.6B (Hub, int8)</option>
|
| 73 |
<option value="distilgpt2">distilgpt2 (local → Hub fallback)</option>
|
| 74 |
</select>
|
| 75 |
</div>
|
|
|
|
| 123 |
|
| 124 |
<script type="module">
|
| 125 |
const { env, AutoTokenizer, AutoModelForCausalLM } = window.HF;
|
| 126 |
+
|
| 127 |
+
/* ---------- ONNX Runtime Web backend selection (compat mode) ---------- */
|
| 128 |
+
env.backends.onnx.webgpu = { enabled: false }; // disable WebGPU
|
| 129 |
+
env.backends.onnx.preferredBackend = "wasm";
|
| 130 |
+
env.backends.onnx.wasm.numThreads = 1; // single-thread (no COOP/COEP)
|
| 131 |
+
env.backends.onnx.wasm.proxy = false; // main thread
|
|
|
|
| 132 |
if (typeof env.backends.onnx.wasm.jsep !== "undefined") {
|
| 133 |
+
env.backends.onnx.wasm.jsep = false; // avoid *threaded.jsep.wasm
|
| 134 |
}
|
| 135 |
|
| 136 |
+
/* ---------- DOM helpers ---------- */
|
| 137 |
const $ = (s) => document.querySelector(s);
|
| 138 |
const statusEl = $('#status'), barEl = $('#bar'), errEl = $('#error');
|
| 139 |
const textEl = $('#text'), klistEl = $('#klist'), timeEl = $('#time');
|
|
|
|
| 142 |
const embCanvas = $('#embCanvas'), embCtx = embCanvas.getContext('2d');
|
| 143 |
const embStatus = $('#embStatus');
|
| 144 |
|
|
|
|
| 145 |
function ABS(p) {
|
| 146 |
const base = (typeof document !== "undefined" && document.baseURI)
|
| 147 |
? document.baseURI
|
| 148 |
: (typeof location !== "undefined" ? location.href : "https://");
|
| 149 |
return new URL(p, base).href;
|
| 150 |
}
|
|
|
|
| 151 |
function setStatus(t){ if(statusEl) statusEl.textContent = t; }
|
| 152 |
function setErr(e){ errEl.textContent = e || ""; }
|
| 153 |
function showToken(s){ if (s === "\n") return "⏎"; if (s.trim() === "") return `␣${s.length>1 ? "×"+s.length : ""}`; return s; }
|
| 154 |
const PUNC_ONLY = /^[\s.,;:!?—-]+$/;
|
| 155 |
|
| 156 |
+
/* ---------- Byte-accurate progress ---------- */
|
| 157 |
+
const transfers = new Map();
|
| 158 |
function resetProgress(){ transfers.clear(); if (barEl) barEl.style.width = "0%"; }
|
| 159 |
function fmtMB(b){ return (b/1024/1024).toFixed(1) + " MB"; }
|
| 160 |
function onProgress(evt){
|
|
|
|
| 161 |
if (evt.file && evt.loaded != null) {
|
| 162 |
const prev = transfers.get(evt.file) || { loaded: 0, total: evt.total || 0 };
|
| 163 |
const total = evt.total != null ? evt.total : prev.total;
|
|
|
|
| 175 |
setStatus(evt.status);
|
| 176 |
}
|
| 177 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
/* ---------- Model registry ---------- */
|
| 180 |
const MODELS = {
|
| 181 |
qwen: {
|
| 182 |
+
remote: "onnx-community/Qwen3-0.6B-ONNX",
|
|
|
|
| 183 |
dtype: "int8",
|
| 184 |
emb: {
|
| 185 |
coords: ABS("assets/embeddings/qwen_pca_top5k_coords.json"),
|
|
|
|
| 197 |
}
|
| 198 |
};
|
| 199 |
|
| 200 |
+
/* ---------- Qwen3 config shim (treat as Qwen2 in JS) ---------- */
|
| 201 |
+
const QWEN3_CONFIG_FIX = {
|
| 202 |
+
model_type: "qwen2",
|
| 203 |
+
architectures: ["Qwen2ForCausalLM"]
|
| 204 |
+
};
|
| 205 |
+
|
| 206 |
/* ---------- Embedding viewer ---------- */
|
| 207 |
const Emb = (() => {
|
| 208 |
let coordsPath = "", nbrsPath = "";
|
|
|
|
| 249 |
const cfg = MODELS[key];
|
| 250 |
const mySeq = ++loadSeq;
|
| 251 |
|
| 252 |
+
// Embeddings
|
| 253 |
Emb.setSources(key);
|
| 254 |
try { await Emb.load(); } catch { embStatus.textContent = "Map failed to load"; }
|
| 255 |
|
| 256 |
+
// Tokenizer
|
| 257 |
setErr(""); setStatus("Loading tokenizer…"); resetProgress();
|
| 258 |
try {
|
| 259 |
tokenizer = await AutoTokenizer.from_pretrained(cfg.remote, { progress_callback: onProgress });
|
|
|
|
| 264 |
}
|
| 265 |
if (mySeq !== loadSeq) return;
|
| 266 |
|
| 267 |
+
// Model
|
| 268 |
setStatus("Loading model…"); resetProgress();
|
| 269 |
try {
|
| 270 |
+
if (key === "qwen") {
|
| 271 |
+
model = await AutoModelForCausalLM.from_pretrained(cfg.remote, {
|
| 272 |
+
dtype: cfg.dtype,
|
| 273 |
+
progress_callback: onProgress,
|
| 274 |
+
config: QWEN3_CONFIG_FIX
|
| 275 |
+
});
|
| 276 |
+
} else {
|
| 277 |
+
model = await AutoModelForCausalLM.from_pretrained(cfg.remote, {
|
| 278 |
+
dtype: cfg.dtype,
|
| 279 |
+
progress_callback: onProgress
|
| 280 |
+
});
|
| 281 |
+
}
|
| 282 |
} catch (e) {
|
| 283 |
console.error("Model load failed:", e);
|
| 284 |
setErr("Model failed to load. Check your connection or try the other model.");
|
|
|
|
| 286 |
}
|
| 287 |
if (mySeq !== loadSeq) return;
|
| 288 |
|
| 289 |
+
// Warm-up
|
| 290 |
setStatus("Warming up…");
|
| 291 |
const enc = await tokenizer(" ", { add_special_tokens: false, return_attention_mask: true });
|
| 292 |
await model({ input_ids: enc.input_ids, attention_mask: enc.attention_mask });
|
|
|
|
| 302 |
const out = await model({ input_ids: enc.input_ids, attention_mask: enc.attention_mask });
|
| 303 |
const dt = (performance.now() - t0) | 0;
|
| 304 |
|
| 305 |
+
// logits: Tensor { data: Float32Array, dims: [1, seqLen, vocabSize] }
|
| 306 |
const logitsT = out.logits;
|
| 307 |
+
const dims = logitsT.dims;
|
| 308 |
+
const data = logitsT.data;
|
|
|
|
| 309 |
const vocabSize = dims[dims.length - 1];
|
| 310 |
const seqLen = dims[dims.length - 2];
|
|
|
|
|
|
|
| 311 |
const start = (seqLen - 1) * vocabSize;
|
| 312 |
+
const last = data.subarray(start, start + vocabSize);
|
| 313 |
+
|
| 314 |
+
// softmax
|
| 315 |
+
let m = -Infinity; for (let i = 0; i < last.length; i++) if (last[i] > m) m = last[i];
|
| 316 |
+
const exps = new Float32Array(last.length); let Z = 0;
|
|
|
|
|
|
|
|
|
|
| 317 |
for (let i = 0; i < last.length; i++) { const e = Math.exp(last[i] - m); exps[i] = e; Z += e; }
|
| 318 |
+
|
| 319 |
+
const K = Math.min(parseInt(topkSel.value, 10) || 10, last.length);
|
|
|
|
| 320 |
const idx = Array.from({ length: last.length }, (_, i) => [exps[i] / Z, i])
|
| 321 |
+
.sort((a, b) => b[0] - a[0]).slice(0, K);
|
| 322 |
+
|
|
|
|
|
|
|
| 323 |
const rows = [];
|
| 324 |
+
for (const [p, i] of idx){
|
| 325 |
+
const tok = await tokenizer.decode([i], { skip_special_tokens:false });
|
| 326 |
+
rows.push({ token: tok, p, id:i });
|
| 327 |
}
|
| 328 |
return { rows, dt };
|
| 329 |
}
|
|
|
|
| 367 |
|
| 368 |
/* ---------- Boot ---------- */
|
| 369 |
(async function init(){
|
| 370 |
+
// optional: show word-like tokens first in the demo
|
| 371 |
+
// hidePunc.checked = true;
|
| 372 |
+
|
| 373 |
+
await loadModel(modelSel.value); // defaults to 'qwen' (Qwen3-0.6B)
|
| 374 |
if (!textEl.value) textEl.value = "Twinkle, twinkle, little ";
|
| 375 |
await predict();
|
| 376 |
})();
|
| 377 |
</script>
|
| 378 |
</body>
|
| 379 |
+
</html>
|