PeterPinetree commited on
Commit
5512a5e
·
verified ·
1 Parent(s): 8caf072

Update index.html

Browse files

fix(tokenizer): include merges.txt + vocab.json for Qwen BPE; load as text/JSON in loadTokenizerQwenLocal to avoid e.split error

Files changed (1) hide show
  1. index.html +35 -0
index.html CHANGED
@@ -274,6 +274,41 @@
274
  return { setSources, load, drawBase, highlight };
275
  })();
276
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  // Core model state (module scope)
278
  let tokenizer = null, model = null;
279
  let loadSeq = 0;
 
274
  return { setSources, load, drawBase, highlight };
275
  })();
276
 
277
+ // --- Local Qwen tokenizer loader (no Hub, no path rewrite) ---
278
+ async function loadTokenizerQwenLocal() {
279
+ resetProgress("Tokenizer");
280
+ setStatus("Tokenizer: starting…");
281
+
282
+ const baseHref = new URL(MODELS.qwen.base, window.location.href).href;
283
+
284
+ const tjsonURL = new URL("tokenizer.json", baseHref).href;
285
+ const tcfgURL = new URL("tokenizer_config.json", baseHref).href;
286
+ const smapURL = new URL("special_tokens_map.json", baseHref).href; // optional
287
+
288
+ const [tokJSON, tokCfgJSON, smapJSON] = await Promise.all([
289
+ fetch(tjsonURL).then(r => { if (!r.ok) throw new Error("missing tokenizer.json"); return r.json(); }),
290
+ fetch(tcfgURL).then(r => { if (!r.ok) throw new Error("missing tokenizer_config.json"); return r.json(); }),
291
+ fetch(smapURL).then(r => r.ok ? r.json() : null),
292
+ ]);
293
+
294
+ const files = new Map();
295
+ files.set("tokenizer.json", new Blob([JSON.stringify(tokJSON)], { type: "application/json" }));
296
+ files.set("tokenizer_config.json", new Blob([JSON.stringify(tokCfgJSON)], { type: "application/json" }));
297
+ if (smapJSON) {
298
+ files.set("special_tokens_map.json", new Blob([JSON.stringify(smapJSON)], { type: "application/json" }));
299
+ }
300
+
301
+ tokenizer = await AutoTokenizer.from_pretrained("", {
302
+ files,
303
+ local_files_only: true,
304
+ progress_callback: onProgress,
305
+ });
306
+ }
307
+
308
+ // --- Core model state ---
309
+ let tokenizer = null, model = null;
310
+ let loadSeq = 0;
311
+
312
  // Core model state (module scope)
313
  let tokenizer = null, model = null;
314
  let loadSeq = 0;