Update index.html
Browse files- index.html +24 -24
index.html
CHANGED
|
@@ -279,21 +279,30 @@
|
|
| 279 |
resetProgress("Tokenizer");
|
| 280 |
setStatus("Tokenizer: starting…");
|
| 281 |
|
| 282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
-
//
|
| 285 |
const tjson = new URL("tokenizer.json", base).href;
|
| 286 |
const tcfg = new URL("tokenizer_config.json", base).href;
|
| 287 |
const smap = new URL("special_tokens_map.json", base).href;
|
| 288 |
const merges= new URL("merges.txt", base).href;
|
| 289 |
|
| 290 |
-
//
|
| 291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
if (hasTokenizerJSON) {
|
| 294 |
const [tokJSON, tokCfgJSON, smapJSON] = await Promise.all([
|
| 295 |
-
fetch(tjson).then(r => { if (!r.ok) throw new Error("missing tokenizer.json");
|
| 296 |
-
fetch(tcfg ).then(r => { if (!r.ok) throw new Error("missing tokenizer_config.json");
|
| 297 |
fetch(smap ).then(r => r.ok ? r.json() : null),
|
| 298 |
]);
|
| 299 |
|
|
@@ -310,24 +319,17 @@
|
|
| 310 |
return;
|
| 311 |
}
|
| 312 |
|
| 313 |
-
//
|
| 314 |
-
const
|
| 315 |
-
const mergesTxt = await
|
| 316 |
-
if (!r.ok) throw new Error("missing
|
| 317 |
-
return r.text();
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
.split(/\r?\n/)
|
| 321 |
-
.filter(line => line && !line.startsWith("#"));
|
| 322 |
-
|
| 323 |
-
const tokCfgJSON = await fetch(tcfg).then(r => {
|
| 324 |
-
if (!r.ok) throw new Error("missing tokenizer_config.json");
|
| 325 |
-
return r.json();
|
| 326 |
-
});
|
| 327 |
|
| 328 |
const files = new Map();
|
| 329 |
-
files.set("vocab.json", new Blob([
|
| 330 |
-
files.set("merges.txt", new Blob([
|
| 331 |
files.set("tokenizer_config.json", new Blob([JSON.stringify(tokCfgJSON)], { type: "application/json" }));
|
| 332 |
|
| 333 |
tokenizer = await AutoTokenizer.from_pretrained("", {
|
|
@@ -337,8 +339,6 @@
|
|
| 337 |
});
|
| 338 |
}
|
| 339 |
|
| 340 |
-
|
| 341 |
-
|
| 342 |
// Core model state (module scope)
|
| 343 |
let tokenizer = null, model = null;
|
| 344 |
let loadSeq = 0;
|
|
|
|
| 279 |
resetProgress("Tokenizer");
|
| 280 |
setStatus("Tokenizer: starting…");
|
| 281 |
|
| 282 |
+
// Always resolve from the current document URL and force the /qwen/ segment.
|
| 283 |
+
let base = new URL("assets/models/qwen/", document.baseURI).href;
|
| 284 |
+
if (!/\/qwen\/$/.test(base)) {
|
| 285 |
+
base = new URL("assets/models/qwen/", base).href;
|
| 286 |
+
}
|
| 287 |
|
| 288 |
+
// Build exact file URLs
|
| 289 |
const tjson = new URL("tokenizer.json", base).href;
|
| 290 |
const tcfg = new URL("tokenizer_config.json", base).href;
|
| 291 |
const smap = new URL("special_tokens_map.json", base).href;
|
| 292 |
const merges= new URL("merges.txt", base).href;
|
| 293 |
|
| 294 |
+
// DEBUG: verify what the browser will fetch
|
| 295 |
+
console.log("[Qwen tokenizer paths]", { base, tjson, tcfg, smap, merges });
|
| 296 |
+
|
| 297 |
+
// Detect tokenizer.json first
|
| 298 |
+
const hasTokenizerJSON = await fetch(tjson, { method: "HEAD" })
|
| 299 |
+
.then(r => r.ok)
|
| 300 |
+
.catch(() => false);
|
| 301 |
|
| 302 |
if (hasTokenizerJSON) {
|
| 303 |
const [tokJSON, tokCfgJSON, smapJSON] = await Promise.all([
|
| 304 |
+
fetch(tjson).then(r => { if (!r.ok) throw new Error("missing tokenizer.json"); return r.json(); }),
|
| 305 |
+
fetch(tcfg ).then(r => { if (!r.ok) throw new Error("missing tokenizer_config.json"); return r.json(); }),
|
| 306 |
fetch(smap ).then(r => r.ok ? r.json() : null),
|
| 307 |
]);
|
| 308 |
|
|
|
|
| 319 |
return;
|
| 320 |
}
|
| 321 |
|
| 322 |
+
// Fallback: vocab.json + merges.txt (legacy BPE)
|
| 323 |
+
const vocabURL = new URL("vocab.json", base).href;
|
| 324 |
+
const [vocabTxt, mergesTxt, tokCfgJSON] = await Promise.all([
|
| 325 |
+
fetch(vocabURL).then(r => { if (!r.ok) throw new Error("missing vocab.json"); return r.text(); }),
|
| 326 |
+
fetch(merges ).then(r => { if (!r.ok) throw new Error("missing merges.txt"); return r.text(); }),
|
| 327 |
+
fetch(tcfg ).then(r => { if (!r.ok) throw new Error("missing tokenizer_config.json"); return r.json(); }),
|
| 328 |
+
]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
|
| 330 |
const files = new Map();
|
| 331 |
+
files.set("vocab.json", new Blob([vocabTxt], { type: "application/json" }));
|
| 332 |
+
files.set("merges.txt", new Blob([mergesTxt], { type: "text/plain" }));
|
| 333 |
files.set("tokenizer_config.json", new Blob([JSON.stringify(tokCfgJSON)], { type: "application/json" }));
|
| 334 |
|
| 335 |
tokenizer = await AutoTokenizer.from_pretrained("", {
|
|
|
|
| 339 |
});
|
| 340 |
}
|
| 341 |
|
|
|
|
|
|
|
| 342 |
// Core model state (module scope)
|
| 343 |
let tokenizer = null, model = null;
|
| 344 |
let loadSeq = 0;
|