Update index.html
Browse files- index.html +135 -135
index.html
CHANGED
|
@@ -87,148 +87,138 @@
|
|
| 87 |
|
| 88 |
<div id="results"></div>
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
this.processor = null;
|
| 108 |
-
this.model_id = modelId;
|
| 109 |
-
}
|
| 110 |
-
if (!this.processor) {
|
| 111 |
-
this.processor = await AutoProcessor.from_pretrained(modelId);
|
| 112 |
-
}
|
| 113 |
-
if (!this.model) {
|
| 114 |
-
this.model = await AutoModelForVision2Seq.from_pretrained(modelId, {
|
| 115 |
-
dtype: {
|
| 116 |
-
embed_tokens: dtypeSettings.embed,
|
| 117 |
-
vision_encoder: dtypeSettings.vision,
|
| 118 |
-
decoder_model_merged: dtypeSettings.decoder,
|
| 119 |
-
},
|
| 120 |
-
device: device,
|
| 121 |
-
});
|
| 122 |
-
}
|
| 123 |
-
return [this.processor, this.model];
|
| 124 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
const
|
| 147 |
-
const
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
|
| 166 |
-
const messages = [{
|
| 167 |
-
role: "user",
|
| 168 |
-
content: [
|
| 169 |
-
{ type: "image" },
|
| 170 |
-
{ type: "text", text: "Can you describe this image?" },
|
| 171 |
-
],
|
| 172 |
-
}];
|
| 173 |
const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
|
| 174 |
const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
await model.generate({
|
| 176 |
...inputs,
|
| 177 |
-
max_new_tokens:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
});
|
| 179 |
-
} catch (e) {
|
| 180 |
-
overallResultsHtml += "<p>Error during warmup: " + e.toString() + "</p></div>";
|
| 181 |
-
continue;
|
| 182 |
-
}
|
| 183 |
-
|
| 184 |
-
// Benchmark runs using streaming generation.
|
| 185 |
-
let totalTime = 0;
|
| 186 |
-
let totalTps = 0;
|
| 187 |
-
let runsResults = [];
|
| 188 |
-
for (let i = 0; i < numRuns; ++i) {
|
| 189 |
-
try {
|
| 190 |
-
const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
|
| 191 |
-
const messages = [{
|
| 192 |
-
role: "user",
|
| 193 |
-
content: [
|
| 194 |
-
{ type: "image" },
|
| 195 |
-
{ type: "text", text: "Can you describe this image?" },
|
| 196 |
-
],
|
| 197 |
-
}];
|
| 198 |
-
const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
|
| 199 |
-
const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
|
| 200 |
-
let startTime, numTokens = 0, tps = 0;
|
| 201 |
-
const token_callback_function = () => {
|
| 202 |
-
startTime = startTime || performance.now();
|
| 203 |
-
tps = (numTokens++ / (performance.now() - startTime)) * 1000;
|
| 204 |
-
};
|
| 205 |
-
const streamer = new TextStreamer(processor.tokenizer, {
|
| 206 |
-
skip_prompt: true,
|
| 207 |
-
skip_special_tokens: true,
|
| 208 |
-
token_callback_function,
|
| 209 |
-
});
|
| 210 |
-
const generateStartTime = performance.now();
|
| 211 |
-
await model.generate({
|
| 212 |
-
...inputs,
|
| 213 |
-
max_new_tokens: maxTokens,
|
| 214 |
-
min_new_tokens: maxTokens,
|
| 215 |
-
streamer,
|
| 216 |
-
});
|
| 217 |
-
const endTime = performance.now();
|
| 218 |
-
const elapsed = endTime - generateStartTime;
|
| 219 |
-
totalTime += elapsed;
|
| 220 |
-
totalTps += tps;
|
| 221 |
-
runsResults.push({
|
| 222 |
-
run: i + 1,
|
| 223 |
-
time: elapsed.toFixed(2),
|
| 224 |
-
tps: tps.toFixed(2)
|
| 225 |
-
});
|
| 226 |
-
} catch (e) {
|
| 227 |
-
runsResults.push({ run: i + 1, time: "Error", tps: "Error" });
|
| 228 |
-
}
|
| 229 |
}
|
|
|
|
| 230 |
const avgTime = (totalTime / numRuns).toFixed(2);
|
| 231 |
const avgTps = (totalTps / numRuns).toFixed(2);
|
|
|
|
|
|
|
|
|
|
| 232 |
let tableHtml = "<table>";
|
| 233 |
tableHtml += "<tr><th>Run</th><th>Execution Time (ms)</th><th>Tokens per Second</th></tr>";
|
| 234 |
runsResults.forEach(r => {
|
|
@@ -236,13 +226,23 @@
|
|
| 236 |
});
|
| 237 |
tableHtml += `<tr><td><strong>Average</strong></td><td><strong>${avgTime}</strong></td><td><strong>${avgTps}</strong></td></tr>`;
|
| 238 |
tableHtml += "</table>";
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
| 240 |
}
|
| 241 |
-
resultsDiv.innerHTML = overallResultsHtml;
|
| 242 |
}
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
</body>
|
| 247 |
</html>
|
| 248 |
|
|
|
|
| 87 |
|
| 88 |
<div id="results"></div>
|
| 89 |
|
| 90 |
+
<script type="module">
|
| 91 |
+
import {
|
| 92 |
+
AutoProcessor,
|
| 93 |
+
AutoModelForVision2Seq,
|
| 94 |
+
load_image,
|
| 95 |
+
TextStreamer,
|
| 96 |
+
} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.4.2";
|
| 97 |
+
|
| 98 |
+
class SmolVLM {
|
| 99 |
+
static model = null;
|
| 100 |
+
static processor = null;
|
| 101 |
+
static model_id = null;
|
| 102 |
+
static async getInstance(modelId, dtypeSettings, device) {
|
| 103 |
+
if (this.model_id !== modelId) {
|
| 104 |
+
this.model = null;
|
| 105 |
+
this.processor = null;
|
| 106 |
+
this.model_id = modelId;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
}
|
| 108 |
+
if (!this.processor) {
|
| 109 |
+
this.processor = await AutoProcessor.from_pretrained(modelId);
|
| 110 |
+
}
|
| 111 |
+
if (!this.model) {
|
| 112 |
+
this.model = await AutoModelForVision2Seq.from_pretrained(modelId, {
|
| 113 |
+
dtype: {
|
| 114 |
+
embed_tokens: dtypeSettings.embed,
|
| 115 |
+
vision_encoder: dtypeSettings.vision,
|
| 116 |
+
decoder_model_merged: dtypeSettings.decoder,
|
| 117 |
+
},
|
| 118 |
+
device: device,
|
| 119 |
+
});
|
| 120 |
+
}
|
| 121 |
+
return [this.processor, this.model];
|
| 122 |
}
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
async function runBenchmark() {
|
| 126 |
+
document.getElementById("model-options").disabled = true;
|
| 127 |
+
document.getElementById("hardware-options").disabled = true;
|
| 128 |
+
const resultsDiv = document.getElementById("results");
|
| 129 |
+
resultsDiv.innerHTML = "";
|
| 130 |
+
|
| 131 |
+
const modelIds = [
|
| 132 |
+
"HuggingFaceTB/SmolVLM-256M-Instruct",
|
| 133 |
+
"HuggingFaceTB/SmolVLM-500M-Instruct",
|
| 134 |
+
"HuggingFaceTB/SmolVLM-Instruct"
|
| 135 |
+
];
|
| 136 |
|
| 137 |
+
const decoder_dtype = document.getElementById("decoder-dtype").value || "fp32";
|
| 138 |
+
const embed_dtype = document.getElementById("embed-dtype").value || "fp32";
|
| 139 |
+
const vision_dtype = document.getElementById("vision-dtype").value || "fp32";
|
| 140 |
+
const device = document.getElementById("device").value;
|
| 141 |
+
const imageUrl = document.getElementById("image-url").value;
|
| 142 |
+
const maxTokens = parseInt(document.getElementById("max-tokens").value) || 128;
|
| 143 |
+
const numRuns = parseInt(document.getElementById("num-runs").value) || 5;
|
| 144 |
+
const doImageSplitting = document.getElementById("do-split").checked;
|
| 145 |
+
|
| 146 |
+
const dtypeSettings = { decoder: decoder_dtype, embed: embed_dtype, vision: vision_dtype };
|
| 147 |
+
const image = await load_image(imageUrl);
|
| 148 |
+
|
| 149 |
+
for (const modelId of modelIds) {
|
| 150 |
+
const modelShortName = modelId.split("/").pop();
|
| 151 |
+
const modelSection = document.createElement("div");
|
| 152 |
+
modelSection.className = "model-results";
|
| 153 |
+
modelSection.innerHTML = `<h2>Benchmarking ${modelShortName}</h2><p id="status-${modelShortName}">Loading...</p><pre id="bar-${modelShortName}">▯▯▯▯▯</pre>`;
|
| 154 |
+
resultsDiv.appendChild(modelSection);
|
| 155 |
+
|
| 156 |
+
const status = document.getElementById(`status-${modelShortName}`);
|
| 157 |
+
const bar = document.getElementById(`bar-${modelShortName}`);
|
| 158 |
+
|
| 159 |
+
try {
|
| 160 |
+
status.innerText = "Loading processor and model...";
|
| 161 |
+
const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
|
| 162 |
+
|
| 163 |
+
status.innerText = "Warming up...";
|
| 164 |
+
const messages = [{
|
| 165 |
+
role: "user",
|
| 166 |
+
content: [
|
| 167 |
+
{ type: "image" },
|
| 168 |
+
{ type: "text", text: "Can you describe this image?" },
|
| 169 |
+
],
|
| 170 |
+
}];
|
| 171 |
+
const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
|
| 172 |
+
const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
|
| 173 |
+
await model.generate({ ...inputs, max_new_tokens: 1 });
|
| 174 |
+
|
| 175 |
+
let totalTime = 0;
|
| 176 |
+
let totalTps = 0;
|
| 177 |
+
let runsResults = [];
|
| 178 |
+
|
| 179 |
+
for (let i = 0; i < numRuns; ++i) {
|
| 180 |
+
status.innerText = `Running benchmark... (${i + 1}/${numRuns})`;
|
| 181 |
+
bar.innerText = createProgressBar(i + 1, numRuns);
|
| 182 |
+
const start = performance.now();
|
| 183 |
const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
|
| 185 |
const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
|
| 186 |
+
|
| 187 |
+
let numTokens = 0;
|
| 188 |
+
let startTime;
|
| 189 |
+
let tps = 0;
|
| 190 |
+
const token_callback_function = () => {
|
| 191 |
+
startTime = startTime || performance.now();
|
| 192 |
+
tps = (numTokens++ / (performance.now() - startTime)) * 1000;
|
| 193 |
+
};
|
| 194 |
+
const streamer = new TextStreamer(processor.tokenizer, {
|
| 195 |
+
skip_prompt: true,
|
| 196 |
+
skip_special_tokens: true,
|
| 197 |
+
token_callback_function,
|
| 198 |
+
});
|
| 199 |
+
|
| 200 |
await model.generate({
|
| 201 |
...inputs,
|
| 202 |
+
max_new_tokens: maxTokens,
|
| 203 |
+
min_new_tokens: maxTokens,
|
| 204 |
+
streamer,
|
| 205 |
+
});
|
| 206 |
+
|
| 207 |
+
const elapsed = performance.now() - start;
|
| 208 |
+
totalTime += elapsed;
|
| 209 |
+
totalTps += tps;
|
| 210 |
+
runsResults.push({
|
| 211 |
+
run: i + 1,
|
| 212 |
+
time: elapsed.toFixed(2),
|
| 213 |
+
tps: tps.toFixed(2)
|
| 214 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
}
|
| 216 |
+
|
| 217 |
const avgTime = (totalTime / numRuns).toFixed(2);
|
| 218 |
const avgTps = (totalTps / numRuns).toFixed(2);
|
| 219 |
+
status.innerText = "✅ Done!";
|
| 220 |
+
bar.innerText = createProgressBar(numRuns, numRuns);
|
| 221 |
+
|
| 222 |
let tableHtml = "<table>";
|
| 223 |
tableHtml += "<tr><th>Run</th><th>Execution Time (ms)</th><th>Tokens per Second</th></tr>";
|
| 224 |
runsResults.forEach(r => {
|
|
|
|
| 226 |
});
|
| 227 |
tableHtml += `<tr><td><strong>Average</strong></td><td><strong>${avgTime}</strong></td><td><strong>${avgTps}</strong></td></tr>`;
|
| 228 |
tableHtml += "</table>";
|
| 229 |
+
modelSection.innerHTML += tableHtml;
|
| 230 |
+
|
| 231 |
+
} catch (e) {
|
| 232 |
+
status.innerText = "❌ Error: " + e.toString();
|
| 233 |
}
|
|
|
|
| 234 |
}
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
function createProgressBar(current, total) {
|
| 238 |
+
const filled = "▮".repeat(current);
|
| 239 |
+
const empty = "▯".repeat(total - current);
|
| 240 |
+
return filled + empty;
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
document.getElementById("start-benchmark").addEventListener("click", runBenchmark);
|
| 244 |
+
</script>
|
| 245 |
+
|
| 246 |
</body>
|
| 247 |
</html>
|
| 248 |
|