gpt2-tokenizer / index.html
julien-c's picture
julien-c HF staff
Complete demo
669a951 verified
raw
history blame
3.6 kB
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://cdn.jsdelivr.net/pyodide/v0.19.1/full/pyodide.js"></script>
</head>
<body>
<div class="container mx-auto px-4">
<h1 class="text-3xl font-bold">
🚚 Pyodide demo
</h1>
<h2 class="text-gray">Python implementation of GPT-2 Tokenizer running inside your browser</h2>
<div class="text-sm text-gray-800">Open your browser console to see Pyodide output</div>
<div class="text-xs text-green-800 mt-4 js-init">Initialization: ...</div>
<div class="flex gap-6 mt-10">
<div class="flex-1 bg-gray-50 p-4 rounded-xl border border-gray-200/60">
<input type="text" placeholder="Enter your sentence…" value="This text is transformed into tokens" class="w-full py-3 px-6" />
<div class="flex gap-4 my-4">
<button class="js-clear bg-gray-50 flex-1 p-3 rounded font-semibold focus:outline-none">Clear</button>
<button class="js-submt bg-indigo-200 flex-1 p-3 rounded font-semibold focus:outline-none">Submit</button>
</div>
</div>
<div class="flex-1 bg-gray-50 p-4 rounded-xl border border-gray-200/60">
<textarea placeholder="Output" class="w-full py-3 px-6 font-mono"></textarea>
</div>
</div>
<h4 class="text-xs mt-10 mb-1">Python code being run:</h4>
<pre class="js-code text-gray-500 text-xs bg-gray-50 p-4 rounded-xl border border-gray-200/60"></pre>
</div>
<script type="text/javascript">
const divInit = document.querySelector(".js-init");
const btnClear = document.querySelector(".js-clear");
const btnSubmt = document.querySelector(".js-submt");
const inputField = document.querySelector("input[type=text]");
const URL_VOCAB = "https://huggingface.co/gpt2/resolve/main/vocab.json";
const URL_MERGES = "https://huggingface.co/gpt2/resolve/main/merges.txt";
(async function main() {
const c = console;
const vocab = await (await fetch(URL_VOCAB)).text();
divInit.innerHTML += `<br> Downloaded vocab from ${URL_VOCAB}`;
const merges = await (await fetch(URL_MERGES)).text();
divInit.innerHTML += `<br> Downloaded merges from ${URL_MERGES}`;
const py_code = await (await fetch("./encoder.py")).text();
document.querySelector(".js-code").textContent = py_code;
divInit.innerHTML += `<br> Downloaded python code from present repo`;
const pyodide = await loadPyodide({
indexURL : "https://cdn.jsdelivr.net/pyodide/v0.19.1/full/"
});
divInit.innerHTML += `<br> Initialized Pyodide`;
await pyodide.loadPackagesFromImports(py_code);
pyodide.runPython(py_code);
pyodide.globals.set("vocab", vocab);
pyodide.globals.set("merges", merges);
pyodide.runPython(`encoder = get_encoder_from_strings(vocab, merges)`);
divInit.innerHTML += `<br> Initialized tokenizer`;
const compute = () => {
const inputVal = inputField.value;
const out = Array.from(pyodide.runPython(`encoder.encode(${JSON.stringify(inputVal)})`));
/// ^ array of ints
document.querySelector("textarea").value = out.join(" ");
};
btnSubmt.addEventListener("click", compute);
inputField.addEventListener("input", compute);
btnSubmt.click();
btnClear.addEventListener("click", () => {
inputField.value = "";
compute();
});
inputField.focus();
inputField.selectionStart = inputField.selectionEnd = inputField.value.length;
})();
</script>
</body>
</html>