Spaces:

PeterPinetree
/

Next-Token-Predictor

Running

PeterPinetree commited on Sep 11

Commit

6041788

1 Parent(s): 357b19c

Update index.html

Switch back to Qwen3-0.6B ONNX with config shim

- Updated model registry to use onnx-community/Qwen3-0.6B-ONNX
- Added QWEN3_CONFIG_FIX to map model_type "qwen3" → Qwen2ForCausalLM
- Patched loadModel to apply shim only for qwen
- Preserved WASM single-thread backend, progress bar, and tensor-safe logits

Files changed (1) hide show

index.html +56 -60

index.html CHANGED Viewed

@@ -41,14 +41,13 @@
     .help { border-bottom:1px dotted #9ab0d0; cursor:help; }
   </style>
-  <!-- Transformers.js for browsers (CDN) -->
   <script type="module">
     import {
       env,
       AutoTokenizer,
       AutoModelForCausalLM
     } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0";
-    // Expose for our script below
     window.HF = { env, AutoTokenizer, AutoModelForCausalLM };
   </script>
 </head>
@@ -69,8 +68,8 @@
         <div class="row" style="justify-content:space-between; margin-bottom:8px;">
           <div class="inline">
             <span class="muted small">Model:</span>
-            <select id="model" class="select" title="Choose which model to evaluate the next token with.">
-              <option value="qwen" selected>Qwen2.5-0.5B (Hub, int8 — faster)</option>
               <option value="distilgpt2">distilgpt2 (local → Hub fallback)</option>
             </select>
           </div>
@@ -124,18 +123,17 @@
   <script type="module">
     const { env, AutoTokenizer, AutoModelForCausalLM } = window.HF;
-    /* ---------- ONNX Runtime Web backend selection ---------- */
-    // Force a conservative, widely-compatible setup.
-    env.backends.onnx.webgpu = { enabled: false };     // disable WebGPU
-    env.backends.onnx.preferredBackend = "wasm";       // force WASM
-    env.backends.onnx.wasm.numThreads = 1;             // single-thread
-    env.backends.onnx.wasm.proxy = false;              // main thread
     if (typeof env.backends.onnx.wasm.jsep !== "undefined") {
-      env.backends.onnx.wasm.jsep = false;             // disable JSEP builds
     }
-    /* ---------- Helpers / DOM ---------- */
     const $ = (s) => document.querySelector(s);
     const statusEl = $('#status'), barEl = $('#bar'), errEl = $('#error');
     const textEl = $('#text'), klistEl = $('#klist'), timeEl = $('#time');
@@ -144,25 +142,22 @@
     const embCanvas = $('#embCanvas'), embCtx = embCanvas.getContext('2d');
     const embStatus = $('#embStatus');
-    // Robust absolute-URL resolver (works even if window.location is unavailable)
     function ABS(p) {
       const base = (typeof document !== "undefined" && document.baseURI)
         ? document.baseURI
         : (typeof location !== "undefined" ? location.href : "https://");
       return new URL(p, base).href;
     }
     function setStatus(t){ if(statusEl) statusEl.textContent = t; }
     function setErr(e){ errEl.textContent = e || ""; }
     function showToken(s){ if (s === "\n") return "⏎"; if (s.trim() === "") return `␣${s.length>1 ? "×"+s.length : ""}`; return s; }
     const PUNC_ONLY = /^[\s.,;:!?—-]+$/;
-    /* ---------- Byte-accurate progress bar ---------- */
-    const transfers = new Map(); // file -> { loaded, total }
     function resetProgress(){ transfers.clear(); if (barEl) barEl.style.width = "0%"; }
     function fmtMB(b){ return (b/1024/1024).toFixed(1) + " MB"; }
     function onProgress(evt){
-      // evt: { status, file?, loaded?, total? }
       if (evt.file && evt.loaded != null) {
         const prev = transfers.get(evt.file) || { loaded: 0, total: evt.total || 0 };
         const total = evt.total != null ? evt.total : prev.total;
@@ -180,21 +175,11 @@
         setStatus(evt.status);
       }
     }
-    // simple stall watchdog (UI hint only)
-    let lastBytes = 0, lastTick = Date.now();
-    setInterval(() => {
-      const bytes = [...transfers.values()].reduce((s,v)=>s+(v.loaded||0),0);
-      if (bytes > lastBytes) { lastBytes = bytes; lastTick = Date.now(); }
-      if ((Date.now()-lastTick)/1000 > 25 && statusEl.textContent.startsWith("Downloading")) {
-        setErr("Download seems idle. Check your network, or try the smaller model in the menu.");
-      }
-    }, 5000);
     /* ---------- Model registry ---------- */
     const MODELS = {
       qwen: {
-        // Faster + supported out-of-the-box (no shim needed)
-        remote: "onnx-community/Qwen2.5-0.5B-Instruct",
         dtype: "int8",
         emb: {
           coords: ABS("assets/embeddings/qwen_pca_top5k_coords.json"),
@@ -212,6 +197,12 @@
       }
     };
     /* ---------- Embedding viewer ---------- */
     const Emb = (() => {
       let coordsPath = "", nbrsPath = "";
@@ -258,10 +249,11 @@
       const cfg = MODELS[key];
       const mySeq = ++loadSeq;
-      // Embeddings for this model
       Emb.setSources(key);
       try { await Emb.load(); } catch { embStatus.textContent = "Map failed to load"; }
       setErr(""); setStatus("Loading tokenizer…"); resetProgress();
       try {
         tokenizer = await AutoTokenizer.from_pretrained(cfg.remote, { progress_callback: onProgress });
@@ -272,12 +264,21 @@
       }
       if (mySeq !== loadSeq) return;
       setStatus("Loading model…"); resetProgress();
       try {
-        model = await AutoModelForCausalLM.from_pretrained(cfg.remote, {
-          dtype: cfg.dtype,
-          progress_callback: onProgress
-        });
       } catch (e) {
         console.error("Model load failed:", e);
         setErr("Model failed to load. Check your connection or try the other model.");
@@ -285,6 +286,7 @@
       }
       if (mySeq !== loadSeq) return;
       setStatus("Warming up…");
       const enc = await tokenizer(" ", { add_special_tokens: false, return_attention_mask: true });
       await model({ input_ids: enc.input_ids, attention_mask: enc.attention_mask });
@@ -300,37 +302,28 @@
       const out = await model({ input_ids: enc.input_ids, attention_mask: enc.attention_mask });
       const dt = (performance.now() - t0) | 0;
-      // --- logits come as a Tensor (data: Float32Array, dims: [1, seqLen, vocab]) ---
       const logitsT = out.logits;
-      const dims    = logitsT.dims;           // e.g., [1, seqLen, vocabSize]
-      const data    = logitsT.data;           // Float32Array
       const vocabSize = dims[dims.length - 1];
       const seqLen    = dims[dims.length - 2];
-      // Take the last time step (length = vocabSize) from the flat buffer
       const start = (seqLen - 1) * vocabSize;
-      const last  = data.subarray(start, start + vocabSize); // typed view (no copy)
-      // Softmax for probabilities
-      let m = -Infinity;
-      for (let i = 0; i < last.length; i++) if (last[i] > m) m = last[i];
-      const exps = new Float32Array(last.length);
-      let Z = 0;
       for (let i = 0; i < last.length; i++) { const e = Math.exp(last[i] - m); exps[i] = e; Z += e; }
-      // Top-K
-      const K   = Math.min(parseInt(topkSel.value, 10) || 10, last.length);
       const idx = Array.from({ length: last.length }, (_, i) => [exps[i] / Z, i])
-        .sort((a, b) => b[0] - a[0])
-        .slice(0, K);
-      // Build rows
       const rows = [];
-      for (const [p, i] of idx) {
-        const tok = await tokenizer.decode([i], { skip_special_tokens: false });
-        rows.push({ token: tok, p, id: i });
       }
       return { rows, dt };
     }
@@ -374,10 +367,13 @@
     /* ---------- Boot ---------- */
     (async function init(){
-      await loadModel(modelSel.value);      // defaults to 'qwen' (remote-only, int8)
       if (!textEl.value) textEl.value = "Twinkle, twinkle, little ";
       await predict();
     })();
   </script>
 </body>
-</html>

     .help { border-bottom:1px dotted #9ab0d0; cursor:help; }
   </style>
+  <!-- Transformers.js (browser) -->
   <script type="module">
     import {
       env,
       AutoTokenizer,
       AutoModelForCausalLM
     } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0";
     window.HF = { env, AutoTokenizer, AutoModelForCausalLM };
   </script>
 </head>
         <div class="row" style="justify-content:space-between; margin-bottom:8px;">
           <div class="inline">
             <span class="muted small">Model:</span>
+            <select id="model" class="select">
+              <option value="qwen" selected>Qwen3-0.6B (Hub, int8)</option>
               <option value="distilgpt2">distilgpt2 (local → Hub fallback)</option>
             </select>
           </div>
   <script type="module">
     const { env, AutoTokenizer, AutoModelForCausalLM } = window.HF;
+    /* ---------- ONNX Runtime Web backend selection (compat mode) ---------- */
+    env.backends.onnx.webgpu = { enabled: false };   // disable WebGPU
+    env.backends.onnx.preferredBackend = "wasm";
+    env.backends.onnx.wasm.numThreads = 1;           // single-thread (no COOP/COEP)
+    env.backends.onnx.wasm.proxy = false;            // main thread
     if (typeof env.backends.onnx.wasm.jsep !== "undefined") {
+      env.backends.onnx.wasm.jsep = false;           // avoid *threaded.jsep.wasm
     }
+    /* ---------- DOM helpers ---------- */
     const $ = (s) => document.querySelector(s);
     const statusEl = $('#status'), barEl = $('#bar'), errEl = $('#error');
     const textEl = $('#text'), klistEl = $('#klist'), timeEl = $('#time');
     const embCanvas = $('#embCanvas'), embCtx = embCanvas.getContext('2d');
     const embStatus = $('#embStatus');
     function ABS(p) {
       const base = (typeof document !== "undefined" && document.baseURI)
         ? document.baseURI
         : (typeof location !== "undefined" ? location.href : "https://");
       return new URL(p, base).href;
     }
     function setStatus(t){ if(statusEl) statusEl.textContent = t; }
     function setErr(e){ errEl.textContent = e || ""; }
     function showToken(s){ if (s === "\n") return "⏎"; if (s.trim() === "") return `␣${s.length>1 ? "×"+s.length : ""}`; return s; }
     const PUNC_ONLY = /^[\s.,;:!?—-]+$/;
+    /* ---------- Byte-accurate progress ---------- */
+    const transfers = new Map();
     function resetProgress(){ transfers.clear(); if (barEl) barEl.style.width = "0%"; }
     function fmtMB(b){ return (b/1024/1024).toFixed(1) + " MB"; }
     function onProgress(evt){
       if (evt.file && evt.loaded != null) {
         const prev = transfers.get(evt.file) || { loaded: 0, total: evt.total || 0 };
         const total = evt.total != null ? evt.total : prev.total;
         setStatus(evt.status);
       }
     }
     /* ---------- Model registry ---------- */
     const MODELS = {
       qwen: {
+        remote: "onnx-community/Qwen3-0.6B-ONNX",
         dtype: "int8",
         emb: {
           coords: ABS("assets/embeddings/qwen_pca_top5k_coords.json"),
       }
     };
+    /* ---------- Qwen3 config shim (treat as Qwen2 in JS) ---------- */
+    const QWEN3_CONFIG_FIX = {
+      model_type: "qwen2",
+      architectures: ["Qwen2ForCausalLM"]
+    };
     /* ---------- Embedding viewer ---------- */
     const Emb = (() => {
       let coordsPath = "", nbrsPath = "";
       const cfg = MODELS[key];
       const mySeq = ++loadSeq;
+      // Embeddings
       Emb.setSources(key);
       try { await Emb.load(); } catch { embStatus.textContent = "Map failed to load"; }
+      // Tokenizer
       setErr(""); setStatus("Loading tokenizer…"); resetProgress();
       try {
         tokenizer = await AutoTokenizer.from_pretrained(cfg.remote, { progress_callback: onProgress });
       }
       if (mySeq !== loadSeq) return;
+      // Model
       setStatus("Loading model…"); resetProgress();
       try {
+        if (key === "qwen") {
+          model = await AutoModelForCausalLM.from_pretrained(cfg.remote, {
+            dtype: cfg.dtype,
+            progress_callback: onProgress,
+            config: QWEN3_CONFIG_FIX
+          });
+        } else {
+          model = await AutoModelForCausalLM.from_pretrained(cfg.remote, {
+            dtype: cfg.dtype,
+            progress_callback: onProgress
+          });
+        }
       } catch (e) {
         console.error("Model load failed:", e);
         setErr("Model failed to load. Check your connection or try the other model.");
       }
       if (mySeq !== loadSeq) return;
+      // Warm-up
       setStatus("Warming up…");
       const enc = await tokenizer(" ", { add_special_tokens: false, return_attention_mask: true });
       await model({ input_ids: enc.input_ids, attention_mask: enc.attention_mask });
       const out = await model({ input_ids: enc.input_ids, attention_mask: enc.attention_mask });
       const dt = (performance.now() - t0) | 0;
+      // logits: Tensor { data: Float32Array, dims: [1, seqLen, vocabSize] }
       const logitsT = out.logits;
+      const dims    = logitsT.dims;
+      const data    = logitsT.data;
       const vocabSize = dims[dims.length - 1];
       const seqLen    = dims[dims.length - 2];
       const start = (seqLen - 1) * vocabSize;
+      const last  = data.subarray(start, start + vocabSize);
+      // softmax
+      let m = -Infinity; for (let i = 0; i < last.length; i++) if (last[i] > m) m = last[i];
+      const exps = new Float32Array(last.length); let Z = 0;
       for (let i = 0; i < last.length; i++) { const e = Math.exp(last[i] - m); exps[i] = e; Z += e; }
+      const K = Math.min(parseInt(topkSel.value, 10) || 10, last.length);
       const idx = Array.from({ length: last.length }, (_, i) => [exps[i] / Z, i])
+        .sort((a, b) => b[0] - a[0]).slice(0, K);
       const rows = [];
+      for (const [p, i] of idx){
+        const tok = await tokenizer.decode([i], { skip_special_tokens:false });
+        rows.push({ token: tok, p, id:i });
       }
       return { rows, dt };
     }
     /* ---------- Boot ---------- */
     (async function init(){
+      // optional: show word-like tokens first in the demo
+      // hidePunc.checked = true;
+      await loadModel(modelSel.value);   // defaults to 'qwen' (Qwen3-0.6B)
       if (!textEl.value) textEl.value = "Twinkle, twinkle, little ";
       await predict();
     })();
   </script>
 </body>
+</html>