PeterPinetree commited on
Commit
6041788
·
1 Parent(s): 357b19c

Update index.html

Browse files

Switch back to Qwen3-0.6B ONNX with config shim

- Updated model registry to use onnx-community/Qwen3-0.6B-ONNX
- Added QWEN3_CONFIG_FIX to map model_type "qwen3" → Qwen2ForCausalLM
- Patched loadModel to apply shim only for qwen
- Preserved WASM single-thread backend, progress bar, and tensor-safe logits

Files changed (1) hide show
  1. index.html +56 -60
index.html CHANGED
@@ -41,14 +41,13 @@
41
  .help { border-bottom:1px dotted #9ab0d0; cursor:help; }
42
  </style>
43
 
44
- <!-- Transformers.js for browsers (CDN) -->
45
  <script type="module">
46
  import {
47
  env,
48
  AutoTokenizer,
49
  AutoModelForCausalLM
50
  } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0";
51
- // Expose for our script below
52
  window.HF = { env, AutoTokenizer, AutoModelForCausalLM };
53
  </script>
54
  </head>
@@ -69,8 +68,8 @@
69
  <div class="row" style="justify-content:space-between; margin-bottom:8px;">
70
  <div class="inline">
71
  <span class="muted small">Model:</span>
72
- <select id="model" class="select" title="Choose which model to evaluate the next token with.">
73
- <option value="qwen" selected>Qwen2.5-0.5B (Hub, int8 — faster)</option>
74
  <option value="distilgpt2">distilgpt2 (local → Hub fallback)</option>
75
  </select>
76
  </div>
@@ -124,18 +123,17 @@
124
 
125
  <script type="module">
126
  const { env, AutoTokenizer, AutoModelForCausalLM } = window.HF;
127
-
128
- /* ---------- ONNX Runtime Web backend selection ---------- */
129
- // Force a conservative, widely-compatible setup.
130
- env.backends.onnx.webgpu = { enabled: false }; // disable WebGPU
131
- env.backends.onnx.preferredBackend = "wasm"; // force WASM
132
- env.backends.onnx.wasm.numThreads = 1; // single-thread
133
- env.backends.onnx.wasm.proxy = false; // main thread
134
  if (typeof env.backends.onnx.wasm.jsep !== "undefined") {
135
- env.backends.onnx.wasm.jsep = false; // disable JSEP builds
136
  }
137
 
138
- /* ---------- Helpers / DOM ---------- */
139
  const $ = (s) => document.querySelector(s);
140
  const statusEl = $('#status'), barEl = $('#bar'), errEl = $('#error');
141
  const textEl = $('#text'), klistEl = $('#klist'), timeEl = $('#time');
@@ -144,25 +142,22 @@
144
  const embCanvas = $('#embCanvas'), embCtx = embCanvas.getContext('2d');
145
  const embStatus = $('#embStatus');
146
 
147
- // Robust absolute-URL resolver (works even if window.location is unavailable)
148
  function ABS(p) {
149
  const base = (typeof document !== "undefined" && document.baseURI)
150
  ? document.baseURI
151
  : (typeof location !== "undefined" ? location.href : "https://");
152
  return new URL(p, base).href;
153
  }
154
-
155
  function setStatus(t){ if(statusEl) statusEl.textContent = t; }
156
  function setErr(e){ errEl.textContent = e || ""; }
157
  function showToken(s){ if (s === "\n") return "⏎"; if (s.trim() === "") return `␣${s.length>1 ? "×"+s.length : ""}`; return s; }
158
  const PUNC_ONLY = /^[\s.,;:!?—-]+$/;
159
 
160
- /* ---------- Byte-accurate progress bar ---------- */
161
- const transfers = new Map(); // file -> { loaded, total }
162
  function resetProgress(){ transfers.clear(); if (barEl) barEl.style.width = "0%"; }
163
  function fmtMB(b){ return (b/1024/1024).toFixed(1) + " MB"; }
164
  function onProgress(evt){
165
- // evt: { status, file?, loaded?, total? }
166
  if (evt.file && evt.loaded != null) {
167
  const prev = transfers.get(evt.file) || { loaded: 0, total: evt.total || 0 };
168
  const total = evt.total != null ? evt.total : prev.total;
@@ -180,21 +175,11 @@
180
  setStatus(evt.status);
181
  }
182
  }
183
- // simple stall watchdog (UI hint only)
184
- let lastBytes = 0, lastTick = Date.now();
185
- setInterval(() => {
186
- const bytes = [...transfers.values()].reduce((s,v)=>s+(v.loaded||0),0);
187
- if (bytes > lastBytes) { lastBytes = bytes; lastTick = Date.now(); }
188
- if ((Date.now()-lastTick)/1000 > 25 && statusEl.textContent.startsWith("Downloading")) {
189
- setErr("Download seems idle. Check your network, or try the smaller model in the menu.");
190
- }
191
- }, 5000);
192
 
193
  /* ---------- Model registry ---------- */
194
  const MODELS = {
195
  qwen: {
196
- // Faster + supported out-of-the-box (no shim needed)
197
- remote: "onnx-community/Qwen2.5-0.5B-Instruct",
198
  dtype: "int8",
199
  emb: {
200
  coords: ABS("assets/embeddings/qwen_pca_top5k_coords.json"),
@@ -212,6 +197,12 @@
212
  }
213
  };
214
 
 
 
 
 
 
 
215
  /* ---------- Embedding viewer ---------- */
216
  const Emb = (() => {
217
  let coordsPath = "", nbrsPath = "";
@@ -258,10 +249,11 @@
258
  const cfg = MODELS[key];
259
  const mySeq = ++loadSeq;
260
 
261
- // Embeddings for this model
262
  Emb.setSources(key);
263
  try { await Emb.load(); } catch { embStatus.textContent = "Map failed to load"; }
264
 
 
265
  setErr(""); setStatus("Loading tokenizer…"); resetProgress();
266
  try {
267
  tokenizer = await AutoTokenizer.from_pretrained(cfg.remote, { progress_callback: onProgress });
@@ -272,12 +264,21 @@
272
  }
273
  if (mySeq !== loadSeq) return;
274
 
 
275
  setStatus("Loading model…"); resetProgress();
276
  try {
277
- model = await AutoModelForCausalLM.from_pretrained(cfg.remote, {
278
- dtype: cfg.dtype,
279
- progress_callback: onProgress
280
- });
 
 
 
 
 
 
 
 
281
  } catch (e) {
282
  console.error("Model load failed:", e);
283
  setErr("Model failed to load. Check your connection or try the other model.");
@@ -285,6 +286,7 @@
285
  }
286
  if (mySeq !== loadSeq) return;
287
 
 
288
  setStatus("Warming up…");
289
  const enc = await tokenizer(" ", { add_special_tokens: false, return_attention_mask: true });
290
  await model({ input_ids: enc.input_ids, attention_mask: enc.attention_mask });
@@ -300,37 +302,28 @@
300
  const out = await model({ input_ids: enc.input_ids, attention_mask: enc.attention_mask });
301
  const dt = (performance.now() - t0) | 0;
302
 
303
- // --- logits come as a Tensor (data: Float32Array, dims: [1, seqLen, vocab]) ---
304
  const logitsT = out.logits;
305
- const dims = logitsT.dims; // e.g., [1, seqLen, vocabSize]
306
- const data = logitsT.data; // Float32Array
307
-
308
  const vocabSize = dims[dims.length - 1];
309
  const seqLen = dims[dims.length - 2];
310
-
311
- // Take the last time step (length = vocabSize) from the flat buffer
312
  const start = (seqLen - 1) * vocabSize;
313
- const last = data.subarray(start, start + vocabSize); // typed view (no copy)
314
-
315
- // Softmax for probabilities
316
- let m = -Infinity;
317
- for (let i = 0; i < last.length; i++) if (last[i] > m) m = last[i];
318
-
319
- const exps = new Float32Array(last.length);
320
- let Z = 0;
321
  for (let i = 0; i < last.length; i++) { const e = Math.exp(last[i] - m); exps[i] = e; Z += e; }
322
-
323
- // Top-K
324
- const K = Math.min(parseInt(topkSel.value, 10) || 10, last.length);
325
  const idx = Array.from({ length: last.length }, (_, i) => [exps[i] / Z, i])
326
- .sort((a, b) => b[0] - a[0])
327
- .slice(0, K);
328
-
329
- // Build rows
330
  const rows = [];
331
- for (const [p, i] of idx) {
332
- const tok = await tokenizer.decode([i], { skip_special_tokens: false });
333
- rows.push({ token: tok, p, id: i });
334
  }
335
  return { rows, dt };
336
  }
@@ -374,10 +367,13 @@
374
 
375
  /* ---------- Boot ---------- */
376
  (async function init(){
377
- await loadModel(modelSel.value); // defaults to 'qwen' (remote-only, int8)
 
 
 
378
  if (!textEl.value) textEl.value = "Twinkle, twinkle, little ";
379
  await predict();
380
  })();
381
  </script>
382
  </body>
383
- </html>
 
41
  .help { border-bottom:1px dotted #9ab0d0; cursor:help; }
42
  </style>
43
 
44
+ <!-- Transformers.js (browser) -->
45
  <script type="module">
46
  import {
47
  env,
48
  AutoTokenizer,
49
  AutoModelForCausalLM
50
  } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0";
 
51
  window.HF = { env, AutoTokenizer, AutoModelForCausalLM };
52
  </script>
53
  </head>
 
68
  <div class="row" style="justify-content:space-between; margin-bottom:8px;">
69
  <div class="inline">
70
  <span class="muted small">Model:</span>
71
+ <select id="model" class="select">
72
+ <option value="qwen" selected>Qwen3-0.6B (Hub, int8)</option>
73
  <option value="distilgpt2">distilgpt2 (local → Hub fallback)</option>
74
  </select>
75
  </div>
 
123
 
124
  <script type="module">
125
  const { env, AutoTokenizer, AutoModelForCausalLM } = window.HF;
126
+
127
+ /* ---------- ONNX Runtime Web backend selection (compat mode) ---------- */
128
+ env.backends.onnx.webgpu = { enabled: false }; // disable WebGPU
129
+ env.backends.onnx.preferredBackend = "wasm";
130
+ env.backends.onnx.wasm.numThreads = 1; // single-thread (no COOP/COEP)
131
+ env.backends.onnx.wasm.proxy = false; // main thread
 
132
  if (typeof env.backends.onnx.wasm.jsep !== "undefined") {
133
+ env.backends.onnx.wasm.jsep = false; // avoid *threaded.jsep.wasm
134
  }
135
 
136
+ /* ---------- DOM helpers ---------- */
137
  const $ = (s) => document.querySelector(s);
138
  const statusEl = $('#status'), barEl = $('#bar'), errEl = $('#error');
139
  const textEl = $('#text'), klistEl = $('#klist'), timeEl = $('#time');
 
142
  const embCanvas = $('#embCanvas'), embCtx = embCanvas.getContext('2d');
143
  const embStatus = $('#embStatus');
144
 
 
145
  function ABS(p) {
146
  const base = (typeof document !== "undefined" && document.baseURI)
147
  ? document.baseURI
148
  : (typeof location !== "undefined" ? location.href : "https://");
149
  return new URL(p, base).href;
150
  }
 
151
  function setStatus(t){ if(statusEl) statusEl.textContent = t; }
152
  function setErr(e){ errEl.textContent = e || ""; }
153
  function showToken(s){ if (s === "\n") return "⏎"; if (s.trim() === "") return `␣${s.length>1 ? "×"+s.length : ""}`; return s; }
154
  const PUNC_ONLY = /^[\s.,;:!?—-]+$/;
155
 
156
+ /* ---------- Byte-accurate progress ---------- */
157
+ const transfers = new Map();
158
  function resetProgress(){ transfers.clear(); if (barEl) barEl.style.width = "0%"; }
159
  function fmtMB(b){ return (b/1024/1024).toFixed(1) + " MB"; }
160
  function onProgress(evt){
 
161
  if (evt.file && evt.loaded != null) {
162
  const prev = transfers.get(evt.file) || { loaded: 0, total: evt.total || 0 };
163
  const total = evt.total != null ? evt.total : prev.total;
 
175
  setStatus(evt.status);
176
  }
177
  }
 
 
 
 
 
 
 
 
 
178
 
179
  /* ---------- Model registry ---------- */
180
  const MODELS = {
181
  qwen: {
182
+ remote: "onnx-community/Qwen3-0.6B-ONNX",
 
183
  dtype: "int8",
184
  emb: {
185
  coords: ABS("assets/embeddings/qwen_pca_top5k_coords.json"),
 
197
  }
198
  };
199
 
200
+ /* ---------- Qwen3 config shim (treat as Qwen2 in JS) ---------- */
201
+ const QWEN3_CONFIG_FIX = {
202
+ model_type: "qwen2",
203
+ architectures: ["Qwen2ForCausalLM"]
204
+ };
205
+
206
  /* ---------- Embedding viewer ---------- */
207
  const Emb = (() => {
208
  let coordsPath = "", nbrsPath = "";
 
249
  const cfg = MODELS[key];
250
  const mySeq = ++loadSeq;
251
 
252
+ // Embeddings
253
  Emb.setSources(key);
254
  try { await Emb.load(); } catch { embStatus.textContent = "Map failed to load"; }
255
 
256
+ // Tokenizer
257
  setErr(""); setStatus("Loading tokenizer…"); resetProgress();
258
  try {
259
  tokenizer = await AutoTokenizer.from_pretrained(cfg.remote, { progress_callback: onProgress });
 
264
  }
265
  if (mySeq !== loadSeq) return;
266
 
267
+ // Model
268
  setStatus("Loading model…"); resetProgress();
269
  try {
270
+ if (key === "qwen") {
271
+ model = await AutoModelForCausalLM.from_pretrained(cfg.remote, {
272
+ dtype: cfg.dtype,
273
+ progress_callback: onProgress,
274
+ config: QWEN3_CONFIG_FIX
275
+ });
276
+ } else {
277
+ model = await AutoModelForCausalLM.from_pretrained(cfg.remote, {
278
+ dtype: cfg.dtype,
279
+ progress_callback: onProgress
280
+ });
281
+ }
282
  } catch (e) {
283
  console.error("Model load failed:", e);
284
  setErr("Model failed to load. Check your connection or try the other model.");
 
286
  }
287
  if (mySeq !== loadSeq) return;
288
 
289
+ // Warm-up
290
  setStatus("Warming up…");
291
  const enc = await tokenizer(" ", { add_special_tokens: false, return_attention_mask: true });
292
  await model({ input_ids: enc.input_ids, attention_mask: enc.attention_mask });
 
302
  const out = await model({ input_ids: enc.input_ids, attention_mask: enc.attention_mask });
303
  const dt = (performance.now() - t0) | 0;
304
 
305
+ // logits: Tensor { data: Float32Array, dims: [1, seqLen, vocabSize] }
306
  const logitsT = out.logits;
307
+ const dims = logitsT.dims;
308
+ const data = logitsT.data;
 
309
  const vocabSize = dims[dims.length - 1];
310
  const seqLen = dims[dims.length - 2];
 
 
311
  const start = (seqLen - 1) * vocabSize;
312
+ const last = data.subarray(start, start + vocabSize);
313
+
314
+ // softmax
315
+ let m = -Infinity; for (let i = 0; i < last.length; i++) if (last[i] > m) m = last[i];
316
+ const exps = new Float32Array(last.length); let Z = 0;
 
 
 
317
  for (let i = 0; i < last.length; i++) { const e = Math.exp(last[i] - m); exps[i] = e; Z += e; }
318
+
319
+ const K = Math.min(parseInt(topkSel.value, 10) || 10, last.length);
 
320
  const idx = Array.from({ length: last.length }, (_, i) => [exps[i] / Z, i])
321
+ .sort((a, b) => b[0] - a[0]).slice(0, K);
322
+
 
 
323
  const rows = [];
324
+ for (const [p, i] of idx){
325
+ const tok = await tokenizer.decode([i], { skip_special_tokens:false });
326
+ rows.push({ token: tok, p, id:i });
327
  }
328
  return { rows, dt };
329
  }
 
367
 
368
  /* ---------- Boot ---------- */
369
  (async function init(){
370
+ // optional: show word-like tokens first in the demo
371
+ // hidePunc.checked = true;
372
+
373
+ await loadModel(modelSel.value); // defaults to 'qwen' (Qwen3-0.6B)
374
  if (!textEl.value) textEl.value = "Twinkle, twinkle, little ";
375
  await predict();
376
  })();
377
  </script>
378
  </body>
379
+ </html>