Update index.html
Browse files- index.html +44 -24
index.html
CHANGED
|
@@ -205,6 +205,7 @@
|
|
| 205 |
const MODELS = {
|
| 206 |
qwen: {
|
| 207 |
id: "qwen", // this is the folder name under assets/models/
|
|
|
|
| 208 |
onnx_file: "onnx/model_q4f16.onnx",
|
| 209 |
emb_coords: "assets/embeddings/qwen_pca_top5k_coords.json",
|
| 210 |
emb_nbrs: "assets/embeddings/qwen_neighbors_top5k_k40.json",
|
|
@@ -274,28 +275,52 @@
|
|
| 274 |
return { setSources, load, drawBase, highlight };
|
| 275 |
})();
|
| 276 |
|
| 277 |
-
// --- Local Qwen tokenizer loader (
|
| 278 |
async function loadTokenizerQwenLocal() {
|
| 279 |
resetProgress("Tokenizer");
|
| 280 |
setStatus("Tokenizer: starting…");
|
| 281 |
|
| 282 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
-
|
| 285 |
-
const
|
| 286 |
-
const
|
| 287 |
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
fetch(smapURL).then(r => r.ok ? r.json() : null),
|
| 292 |
-
]);
|
| 293 |
|
| 294 |
const files = new Map();
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
}
|
| 300 |
|
| 301 |
tokenizer = await AutoTokenizer.from_pretrained("", {
|
|
@@ -304,10 +329,7 @@
|
|
| 304 |
progress_callback: onProgress,
|
| 305 |
});
|
| 306 |
}
|
| 307 |
-
|
| 308 |
-
// --- Core model state ---
|
| 309 |
-
let tokenizer = null, model = null;
|
| 310 |
-
let loadSeq = 0;
|
| 311 |
|
| 312 |
// Core model state (module scope)
|
| 313 |
let tokenizer = null, model = null;
|
|
@@ -325,13 +347,10 @@
|
|
| 325 |
setStatus("Tokenizer: starting…");
|
| 326 |
try {
|
| 327 |
if (key === "qwen") {
|
| 328 |
-
env.allowRemoteModels = false;
|
| 329 |
-
|
| 330 |
-
progress_callback: onProgress,
|
| 331 |
-
// local load uses env.localModelPath + "qwen"/files
|
| 332 |
-
});
|
| 333 |
} else {
|
| 334 |
-
env.allowRemoteModels = true;
|
| 335 |
tokenizer = await AutoTokenizer.from_pretrained(MODELS.distilgpt2.repo, {
|
| 336 |
progress_callback: onProgress,
|
| 337 |
});
|
|
@@ -341,6 +360,7 @@
|
|
| 341 |
setErr("Tokenizer failed to load.");
|
| 342 |
return;
|
| 343 |
}
|
|
|
|
| 344 |
if (mySeq !== loadSeq) return;
|
| 345 |
|
| 346 |
// --- Model ---
|
|
|
|
| 205 |
const MODELS = {
|
| 206 |
qwen: {
|
| 207 |
id: "qwen", // this is the folder name under assets/models/
|
| 208 |
+
base: new URL("assets/models/qwen/", window.location.href).href,
|
| 209 |
onnx_file: "onnx/model_q4f16.onnx",
|
| 210 |
emb_coords: "assets/embeddings/qwen_pca_top5k_coords.json",
|
| 211 |
emb_nbrs: "assets/embeddings/qwen_neighbors_top5k_k40.json",
|
|
|
|
| 275 |
return { setSources, load, drawBase, highlight };
|
| 276 |
})();
|
| 277 |
|
| 278 |
+
// --- Local Qwen tokenizer loader (supports both JSON and BPE files) ---
|
| 279 |
async function loadTokenizerQwenLocal() {
|
| 280 |
resetProgress("Tokenizer");
|
| 281 |
setStatus("Tokenizer: starting…");
|
| 282 |
|
| 283 |
+
const base = MODELS.qwen.base; // e.g. .../assets/models/qwen/
|
| 284 |
+
// New-format filenames
|
| 285 |
+
const tjson = new URL("tokenizer.json", base).href;
|
| 286 |
+
const tcfg = new URL("tokenizer_config.json", base).href;
|
| 287 |
+
const smap = new URL("special_tokens_map.json", base).href;
|
| 288 |
|
| 289 |
+
// Classic BPE filenames (present in many ONNX repos, incl. Qwen)
|
| 290 |
+
const vocab = new URL("vocab.json", base).href;
|
| 291 |
+
const merges = new URL("merges.txt", base).href;
|
| 292 |
|
| 293 |
+
// Try new-format first; if missing, fall back to vocab/merges
|
| 294 |
+
const tjsonHead = await fetch(tjson, { method: "HEAD" });
|
| 295 |
+
const useNewFormat = tjsonHead.ok;
|
|
|
|
|
|
|
| 296 |
|
| 297 |
const files = new Map();
|
| 298 |
+
|
| 299 |
+
if (useNewFormat) {
|
| 300 |
+
const [tokJSON, tokCfgJSON, smapJSON] = await Promise.all([
|
| 301 |
+
fetch(tjson).then(r => { if (!r.ok) throw new Error("missing tokenizer.json"); return r.json(); }),
|
| 302 |
+
fetch(tcfg).then(r => { if (!r.ok) throw new Error("missing tokenizer_config.json"); return r.json(); }),
|
| 303 |
+
fetch(smap).then(r => r.ok ? r.json() : null),
|
| 304 |
+
]);
|
| 305 |
+
files.set("tokenizer.json", new Blob([JSON.stringify(tokJSON)], { type: "application/json" }));
|
| 306 |
+
files.set("tokenizer_config.json", new Blob([JSON.stringify(tokCfgJSON)],{ type: "application/json" }));
|
| 307 |
+
if (smapJSON) files.set("special_tokens_map.json", new Blob([JSON.stringify(smapJSON)], { type: "application/json" }));
|
| 308 |
+
} else {
|
| 309 |
+
// Fall back to BPE pair; this avoids the "e.split is not a function" crash
|
| 310 |
+
const [vocabText, mergesText, tokCfgJSON, smapJSON] = await Promise.all([
|
| 311 |
+
fetch(vocab).then(r => { if (!r.ok) throw new Error("missing vocab.json"); return r.text(); }),
|
| 312 |
+
fetch(merges).then(r => { if (!r.ok) throw new Error("missing merges.txt"); return r.text(); }),
|
| 313 |
+
fetch(tcfg).then(r => r.ok ? r.json() : { model_max_length: 32768 }),
|
| 314 |
+
fetch(smap).then(r => r.ok ? r.json() : null),
|
| 315 |
+
]);
|
| 316 |
+
files.set("vocab.json", new Blob([vocabText], { type: "application/json" }));
|
| 317 |
+
files.set("merges.txt", new Blob([mergesText], { type: "text/plain" }));
|
| 318 |
+
files.set("tokenizer_config.json",
|
| 319 |
+
new Blob([JSON.stringify(tokCfgJSON)], { type: "application/json" })
|
| 320 |
+
);
|
| 321 |
+
if (smapJSON) files.set("special_tokens_map.json",
|
| 322 |
+
new Blob([JSON.stringify(smapJSON)], { type: "application/json" })
|
| 323 |
+
);
|
| 324 |
}
|
| 325 |
|
| 326 |
tokenizer = await AutoTokenizer.from_pretrained("", {
|
|
|
|
| 329 |
progress_callback: onProgress,
|
| 330 |
});
|
| 331 |
}
|
| 332 |
+
|
|
|
|
|
|
|
|
|
|
| 333 |
|
| 334 |
// Core model state (module scope)
|
| 335 |
let tokenizer = null, model = null;
|
|
|
|
| 347 |
setStatus("Tokenizer: starting…");
|
| 348 |
try {
|
| 349 |
if (key === "qwen") {
|
| 350 |
+
env.allowRemoteModels = false; // force local for Qwen
|
| 351 |
+
await loadTokenizerQwenLocal(); // <-- use the local loader
|
|
|
|
|
|
|
|
|
|
| 352 |
} else {
|
| 353 |
+
env.allowRemoteModels = true; // Hub is fine for distilgpt2
|
| 354 |
tokenizer = await AutoTokenizer.from_pretrained(MODELS.distilgpt2.repo, {
|
| 355 |
progress_callback: onProgress,
|
| 356 |
});
|
|
|
|
| 360 |
setErr("Tokenizer failed to load.");
|
| 361 |
return;
|
| 362 |
}
|
| 363 |
+
|
| 364 |
if (mySeq !== loadSeq) return;
|
| 365 |
|
| 366 |
// --- Model ---
|