Spaces:

PeterPinetree
/

Next-Token-Predictor

Running

App Files Files Community

PeterPinetree commited on Sep 13

Commit

d1438b4

verified ·

1 Parent(s): 2150d6e

Update index.html

Browse files

Files changed (1) hide show

index.html +24 -24

index.html CHANGED Viewed

@@ -279,21 +279,30 @@
       resetProgress("Tokenizer");
       setStatus("Tokenizer: starting…");
-      const base = MODELS.qwen.base; // absolute href into assets/models/qwen/
-      // try tokenizer.json first (preferred)
       const tjson = new URL("tokenizer.json", base).href;
       const tcfg  = new URL("tokenizer_config.json", base).href;
       const smap  = new URL("special_tokens_map.json", base).href;
       const merges= new URL("merges.txt", base).href;
-      // detect whether tokenizer.json exists
-      const hasTokenizerJSON = await fetch(tjson, { method: "HEAD" }).then(r => r.ok).catch(() => false);
       if (hasTokenizerJSON) {
         const [tokJSON, tokCfgJSON, smapJSON] = await Promise.all([
-          fetch(tjson).then(r => { if (!r.ok) throw new Error("missing tokenizer.json");       return r.json(); }),
-          fetch(tcfg ).then(r => { if (!r.ok) throw new Error("missing tokenizer_config.json"); return r.json(); }),
           fetch(smap ).then(r => r.ok ? r.json() : null),
         ]);
@@ -310,24 +319,17 @@
         return;
       }
-      // fallback: legacy BPE (vocab.txt + merges.txt)
-      const vocab = new URL("vocab.json", base).href;
-      const mergesTxt = await fetch(merges).then(r => {
-        if (!r.ok) throw new Error("missing merges.txt");
-        return r.text();
-      });
-      const mergesList = mergesTxt
-        .split(/\r?\n/)
-        .filter(line => line && !line.startsWith("#"));
-      const tokCfgJSON = await fetch(tcfg).then(r => {
-        if (!r.ok) throw new Error("missing tokenizer_config.json");
-        return r.json();
-      });
       const files = new Map();
-      files.set("vocab.json", new Blob([await (await fetch(vocab)).text()], { type: "application/json" }));
-      files.set("merges.txt", new Blob([mergesList.join("\n")], { type: "text/plain" }));
       files.set("tokenizer_config.json", new Blob([JSON.stringify(tokCfgJSON)], { type: "application/json" }));
       tokenizer = await AutoTokenizer.from_pretrained("", {
@@ -337,8 +339,6 @@
       });
     }
     // Core model state (module scope)
     let tokenizer = null, model = null;
     let loadSeq = 0;

       resetProgress("Tokenizer");
       setStatus("Tokenizer: starting…");
+      // Always resolve from the current document URL and force the /qwen/ segment.
+      let base = new URL("assets/models/qwen/", document.baseURI).href;
+      if (!/\/qwen\/$/.test(base)) {
+        base = new URL("assets/models/qwen/", base).href;
+      }
+      // Build exact file URLs
       const tjson = new URL("tokenizer.json", base).href;
       const tcfg  = new URL("tokenizer_config.json", base).href;
       const smap  = new URL("special_tokens_map.json", base).href;
       const merges= new URL("merges.txt", base).href;
+      // DEBUG: verify what the browser will fetch
+      console.log("[Qwen tokenizer paths]", { base, tjson, tcfg, smap, merges });
+      // Detect tokenizer.json first
+      const hasTokenizerJSON = await fetch(tjson, { method: "HEAD" })
+        .then(r => r.ok)
+        .catch(() => false);
       if (hasTokenizerJSON) {
         const [tokJSON, tokCfgJSON, smapJSON] = await Promise.all([
+          fetch(tjson).then(r => { if (!r.ok) throw new Error("missing tokenizer.json");        return r.json(); }),
+          fetch(tcfg ).then(r => { if (!r.ok) throw new Error("missing tokenizer_config.json");  return r.json(); }),
           fetch(smap ).then(r => r.ok ? r.json() : null),
         ]);
         return;
       }
+      // Fallback: vocab.json + merges.txt (legacy BPE)
+      const vocabURL = new URL("vocab.json", base).href;
+      const [vocabTxt, mergesTxt, tokCfgJSON] = await Promise.all([
+        fetch(vocabURL).then(r => { if (!r.ok) throw new Error("missing vocab.json"); return r.text(); }),
+        fetch(merges  ).then(r => { if (!r.ok) throw new Error("missing merges.txt"); return r.text(); }),
+        fetch(tcfg    ).then(r => { if (!r.ok) throw new Error("missing tokenizer_config.json"); return r.json(); }),
+      ]);
       const files = new Map();
+      files.set("vocab.json", new Blob([vocabTxt], { type: "application/json" }));
+      files.set("merges.txt", new Blob([mergesTxt], { type: "text/plain" }));
       files.set("tokenizer_config.json", new Blob([JSON.stringify(tokCfgJSON)], { type: "application/json" }));
       tokenizer = await AutoTokenizer.from_pretrained("", {
       });
     }
     // Core model state (module scope)
     let tokenizer = null, model = null;
     let loadSeq = 0;