radames's picture
Upload 6 files
f9fe51a
raw
history blame
18.9 kB
<html>
<head>
<meta content="text/html;charset=utf-8" http-equiv="Content-Type" />
<title>Candle Phi 1.5 / Phi 2.0 Rust/WASM</title>
</head>
<body></body>
</html>
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link
rel="stylesheet"
href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/styles/default.min.css"
/>
<style>
@import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap");
html,
body {
font-family: "Source Sans 3", sans-serif;
}
code,
output,
select,
pre {
font-family: "Source Code Pro", monospace;
}
</style>
<style type="text/tailwindcss">
.link {
@apply underline hover:text-blue-500 hover:no-underline;
}
</style>
<script src="https://cdn.tailwindcss.com"></script>
<script type="module">
import snarkdown from "https://cdn.skypack.dev/snarkdown";
import hljs from "https://cdn.skypack.dev/highlight.js";
// models base url
const MODELS = {
phi_1_5_q4k: {
base_url:
"https://huggingface.co/lmz/candle-quantized-phi/resolve/main/",
model: "model-q4k.gguf",
tokenizer: "tokenizer.json",
config: "phi-1_5.json",
quantized: true,
seq_len: 2048,
size: "800 MB",
},
phi_1_5_q80: {
base_url:
"https://huggingface.co/lmz/candle-quantized-phi/resolve/main/",
model: "model-q80.gguf",
tokenizer: "tokenizer.json",
config: "phi-1_5.json",
quantized: true,
seq_len: 2048,
size: "1.51 GB",
},
phi_2_0_q4k: {
base_url:
"https://huggingface.co/radames/phi-2-quantized/resolve/main/",
model: ["model-v2-q4k.gguf_aa.part", "model-v2-q4k.gguf_ab.part", "model-v2-q4k.gguf_ac.part"],
tokenizer: "tokenizer.json",
config: "config.json",
quantized: true,
seq_len: 2048,
size: "1.57GB",
},
puffin_phi_v2_q4k: {
base_url:
"https://huggingface.co/lmz/candle-quantized-phi/resolve/main/",
model: "model-puffin-phi-v2-q4k.gguf",
tokenizer: "tokenizer-puffin-phi-v2.json",
config: "puffin-phi-v2.json",
quantized: true,
seq_len: 2048,
size: "798 MB",
},
puffin_phi_v2_q80: {
base_url:
"https://huggingface.co/lmz/candle-quantized-phi/resolve/main/",
model: "model-puffin-phi-v2-q80.gguf",
tokenizer: "tokenizer-puffin-phi-v2.json",
config: "puffin-phi-v2.json",
quantized: true,
seq_len: 2048,
size: "1.50 GB",
},
};
const TEMPLATES = [
{
title: "Simple prompt",
prompt: `Sebastien is in London today, it’s the middle of July yet it’s raining, so Sebastien is feeling gloomy. He`,
},
{
title: "Think step by step",
prompt: `Suppose Alice originally had 3 apples, then Bob gave Alice 7 apples, then Alice gave Cook 5 apples, and then Tim gave Alice 3x the amount of apples Alice had. How many apples does Alice have now?
Let’s think step by step.`,
},
{
title: "Explaing a code snippet",
prompt: `What does this script do?
\`\`\`python
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind(('', 0))
s.listen(1)
conn, addr = s.accept()
print('Connected by', addr)
return conn.getsockname()[1]
\`\`\`
Let’s think step by step.`,
},
{
title: "Question answering",
prompt: `Instruct: What is the capital of France?
Output:`,
},
{
title: "Chat mode",
prompt: `Alice: Can you tell me how to create a python application to go through all the files
in one directory where the file’s name DOES NOT end with '.json'?
Bob:`,
},
{
title: "Python code completion",
prompt: `"""write a python function called batch(function, list) which call function(x) for x in
list in parallel"""
Solution:`,
},
{
title: "Python Sample",
prompt: `"""Can you make sure those histograms appear side by side on the same plot:
\`\`\`python
plt.hist(intreps_retrained[0][1].view(64,-1).norm(dim=1).detach().cpu().numpy(), bins = 20)
plt.hist(intreps_pretrained[0][1].view(64,-1).norm(dim=1).detach().cpu().numpy(), bins = 20)
\`\`\`
"""`,
},
{
title: "Write a Twitter post",
prompt: `Write a twitter post for the discovery of gravitational wave.
Twitter Post:`,
},
{
title: "Write a review",
prompt: `Write a polite review complaining that the video game 'Random Game' was too badly optimized and it burned my laptop.
Very polite review:`,
},
];
const phiWorker = new Worker("./phiWorker.js", {
type: "module",
});
async function generateSequence(controller) {
const getValue = (id) => document.querySelector(`#${id}`).value;
const modelID = getValue("model");
const model = MODELS[modelID];
const weightsURL = model.model instanceof Array ? model.model.map((m) => model.base_url + m) : model.base_url + model.model;
const tokenizerURL = model.base_url + model.tokenizer;
const configURL = model.base_url + model.config;
const prompt = getValue("prompt").trim();
const temperature = getValue("temperature");
const topP = getValue("top-p");
const repeatPenalty = getValue("repeat_penalty");
const seed = getValue("seed");
const maxSeqLen = getValue("max-seq");
function updateStatus(data) {
const outStatus = document.querySelector("#output-status");
const outGen = document.querySelector("#output-generation");
const outCounter = document.querySelector("#output-counter");
switch (data.status) {
case "loading":
outStatus.hidden = false;
outStatus.textContent = data.message;
outGen.hidden = true;
outCounter.hidden = true;
break;
case "generating":
const { message, prompt, sentence, tokensSec, totalTime } = data;
outStatus.hidden = true;
outCounter.hidden = false;
outGen.hidden = false;
outGen.innerHTML = snarkdown(prompt + sentence);
outCounter.innerHTML = `${(totalTime / 1000).toFixed(
2
)}s (${tokensSec.toFixed(2)} tok/s)`;
hljs.highlightAll();
break;
case "complete":
outStatus.hidden = true;
outGen.hidden = false;
break;
}
}
return new Promise((resolve, reject) => {
phiWorker.postMessage({
weightsURL,
modelID,
tokenizerURL,
configURL,
quantized: model.quantized,
prompt,
temp: temperature,
top_p: topP,
repeatPenalty,
seed: seed,
maxSeqLen,
command: "start",
});
const handleAbort = () => {
phiWorker.postMessage({ command: "abort" });
};
const handleMessage = (event) => {
const { status, error, message, prompt, sentence } = event.data;
if (status) updateStatus(event.data);
if (error) {
phiWorker.removeEventListener("message", handleMessage);
reject(new Error(error));
}
if (status === "aborted") {
phiWorker.removeEventListener("message", handleMessage);
resolve(event.data);
}
if (status === "complete") {
phiWorker.removeEventListener("message", handleMessage);
resolve(event.data);
}
};
controller.signal.addEventListener("abort", handleAbort);
phiWorker.addEventListener("message", handleMessage);
});
}
const form = document.querySelector("#form");
const prompt = document.querySelector("#prompt");
const clearBtn = document.querySelector("#clear-btn");
const runBtn = document.querySelector("#run");
const modelSelect = document.querySelector("#model");
const promptTemplates = document.querySelector("#prompt-templates");
let runController = new AbortController();
let isRunning = false;
document.addEventListener("DOMContentLoaded", () => {
for (const [id, model] of Object.entries(MODELS)) {
const option = document.createElement("option");
option.value = id;
option.innerText = `${id} (${model.size})`;
modelSelect.appendChild(option);
}
const query = new URLSearchParams(window.location.search);
const modelID = query.get("model");
if (modelID) {
modelSelect.value = modelID;
} else {
modelSelect.value = "phi_1_5_q4k";
}
for (const [i, { title, prompt }] of TEMPLATES.entries()) {
const div = document.createElement("div");
const input = document.createElement("input");
input.type = "radio";
input.name = "task";
input.id = `templates-${i}`;
input.classList.add("font-light", "cursor-pointer");
input.value = prompt;
const label = document.createElement("label");
label.htmlFor = `templates-${i}`;
label.classList.add("cursor-pointer");
label.innerText = title;
div.appendChild(input);
div.appendChild(label);
promptTemplates.appendChild(div);
}
});
promptTemplates.addEventListener("change", (e) => {
const template = e.target.value;
prompt.value = template;
prompt.style.height = "auto";
prompt.style.height = prompt.scrollHeight + "px";
});
modelSelect.addEventListener("change", (e) => {
const query = new URLSearchParams(window.location.search);
query.set("model", e.target.value);
window.history.replaceState(
{ },
"",
`${window.location.pathname}?${query}`
);
window.parent.postMessage({queryString: "?" + query }, "*")
const model = MODELS[e.target.value];
document.querySelector("#max-seq").max = model.seq_len;
document.querySelector("#max-seq").nextElementSibling.value = 200;
});
form.addEventListener("submit", async (e) => {
e.preventDefault();
if (isRunning) {
stopRunning();
} else {
startRunning();
await generateSequence(runController);
stopRunning();
}
});
function startRunning() {
isRunning = true;
runBtn.textContent = "Stop";
}
function stopRunning() {
runController.abort();
runController = new AbortController();
runBtn.textContent = "Run";
isRunning = false;
}
clearBtn.addEventListener("click", (e) => {
e.preventDefault();
prompt.value = "";
clearBtn.classList.add("invisible");
runBtn.disabled = true;
stopRunning();
});
prompt.addEventListener("input", (e) => {
runBtn.disabled = false;
if (e.target.value.length > 0) {
clearBtn.classList.remove("invisible");
} else {
clearBtn.classList.add("invisible");
}
});
</script>
</head>
<body class="container max-w-4xl mx-auto p-4 text-gray-800">
<main class="grid grid-cols-1 gap-8 relative">
<span class="absolute text-5xl -ml-[1em]"> 🕯️ </span>
<div>
<h1 class="text-5xl font-bold">Candle Phi 1.5 / Phi 2.0 </h1>
<h2 class="text-2xl font-bold">Rust/WASM Demo</h2>
<p class="max-w-lg">
The
<a href="https://huggingface.co/microsoft/phi-1_5" class="link" target="_blank">Phi-1.5</a> and
<a href="https://huggingface.co/microsoft/phi-2" class="link" target="_blank">Phi-2</a> models achieves
state-of-the-art performance with only 1.3 billion and 2.7 billion parameters, compared to larger models with up
to 13
billion parameters. Here you can try the quantized versions. Additional prompt examples are
available in the
<a href="https://arxiv.org/pdf/2309.05463.pdf#page=8" class="link" target="_blank">
technical report </a>.
</p>
<p class="max-w-lg">
You can also try
<a href="https://huggingface.co/teknium/Puffin-Phi-v2" class="link" target="_blank">Puffin-Phi V2
</a>
quantized version, a fine-tuned version of Phi-1.5 on the
<a href="https://huggingface.co/datasets/LDJnr/Puffin" class="link" target="_blank">Puffin dataset
</a>
</p>
</div>
<div>
<p class="text-xs italic max-w-lg">
<b>Note:</b>
When first run, the app will download and cache the model, which could
take a few minutes. The models are <b>~800MB</b> or <b>~1.51GB</b> in
size.
</p>
</div>
<div>
<label for="model" class="font-medium">Models Options: </label>
<select
id="model"
class="border-2 border-gray-500 rounded-md font-light"
></select>
</div>
<div>
<h3 class="font-medium">Prompt Templates</h3>
<form id="prompt-templates" class="flex flex-col gap-1 my-2"></form>
</div>
<form
id="form"
class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center"
>
<input type="submit" hidden />
<textarea
type="text"
id="prompt"
class="font-light w-full px-3 py-2 mx-1 resize-none outline-none"
oninput="this.style.height = 0;this.style.height = this.scrollHeight + 'px'"
placeholder="Add your prompt here..."
>
Instruct: Write a detailed analogy between mathematics and a lighthouse.
Output:</textarea
>
<button id="clear-btn">
<svg
fill="none"
xmlns="http://www.w3.org/2000/svg"
width="40"
viewBox="0 0 70 40"
>
<path opacity=".5" d="M39 .2v40.2" stroke="#1F2937" />
<path
d="M1.5 11.5 19 29.1m0-17.6L1.5 29.1"
opacity=".5"
stroke="#1F2937"
stroke-width="2"
/>
</svg>
</button>
<button
id="run"
class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 w-16 rounded disabled:bg-gray-300 disabled:cursor-not-allowed"
>
Run
</button>
</form>
<details>
<summary class="font-medium cursor-pointer">Advanced Options</summary>
<div class="grid grid-cols-3 max-w-md items-center gap-3 py-3">
<label class="text-sm font-medium" for="max-seq"
>Maximum length
</label>
<input
type="range"
id="max-seq"
name="max-seq"
min="1"
max="2048"
step="1"
value="200"
oninput="this.nextElementSibling.value = Number(this.value)"
/>
<output
class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md"
>
200</output
>
<label class="text-sm font-medium" for="temperature"
>Temperature</label
>
<input
type="range"
id="temperature"
name="temperature"
min="0"
max="2"
step="0.01"
value="0.00"
oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)"
/>
<output
class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md"
>
0.00</output
>
<label class="text-sm font-medium" for="top-p">Top-p</label>
<input
type="range"
id="top-p"
name="top-p"
min="0"
max="1"
step="0.01"
value="1.00"
oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)"
/>
<output
class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md"
>
1.00</output
>
<label class="text-sm font-medium" for="repeat_penalty"
>Repeat Penalty</label
>
<input
type="range"
id="repeat_penalty"
name="repeat_penalty"
min="1"
max="2"
step="0.01"
value="1.10"
oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)"
/>
<output
class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md"
>1.10</output
>
<label class="text-sm font-medium" for="seed">Seed</label>
<input
type="number"
id="seed"
name="seed"
value="299792458"
class="font-light border border-gray-700 text-right rounded-md p-2"
/>
<button
id="run"
onclick="document.querySelector('#seed').value = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER)"
class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-1 w-[50px] rounded disabled:bg-gray-300 disabled:cursor-not-allowed text-sm"
>
Rand
</button>
</div>
</details>
<div>
<h3 class="font-medium">Generation:</h3>
<div
class="min-h-[250px] bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2"
>
<div
id="output-counter"
hidden
class="ml-auto font-semibold grid-rows-1 text-sm"
></div>
<p hidden id="output-generation" class="grid-rows-2"></p>
<span id="output-status" class="m-auto font-light"
>No output yet</span
>
</div>
</div>
</main>
</body>
</html>