andito HF Staff commited on
Commit
68bfa8d
·
verified ·
1 Parent(s): d09721a

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +135 -135
index.html CHANGED
@@ -87,148 +87,138 @@
87
 
88
  <div id="results"></div>
89
 
90
- <script type="module">
91
- import {
92
- AutoProcessor,
93
- AutoModelForVision2Seq,
94
- load_image,
95
- TextStreamer,
96
- } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.4.2";
97
-
98
- // Updated SmolVLM class to allow re-loading when modelId changes.
99
- class SmolVLM {
100
- static model = null;
101
- static processor = null;
102
- static model_id = null;
103
- static async getInstance(modelId, dtypeSettings, device) {
104
- if(this.model_id !== modelId) {
105
- // Reset if switching to a new model.
106
- this.model = null;
107
- this.processor = null;
108
- this.model_id = modelId;
109
- }
110
- if (!this.processor) {
111
- this.processor = await AutoProcessor.from_pretrained(modelId);
112
- }
113
- if (!this.model) {
114
- this.model = await AutoModelForVision2Seq.from_pretrained(modelId, {
115
- dtype: {
116
- embed_tokens: dtypeSettings.embed,
117
- vision_encoder: dtypeSettings.vision,
118
- decoder_model_merged: dtypeSettings.decoder,
119
- },
120
- device: device,
121
- });
122
- }
123
- return [this.processor, this.model];
124
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
- async function runBenchmark() {
128
- // Disable controls during benchmark.
129
- document.getElementById("model-options").disabled = true;
130
- document.getElementById("hardware-options").disabled = true;
131
- const resultsDiv = document.getElementById("results");
132
- resultsDiv.innerHTML = "<p>Loading models and running benchmarks...</p>";
133
-
134
- // Define the two SmolVLM model IDs to test.
135
- const modelIds = [
136
- "HuggingFaceTB/SmolVLM-256M-Instruct",
137
- "HuggingFaceTB/SmolVLM-500M-Instruct"
138
- ];
139
-
140
- let overallResultsHtml = "";
141
- // Get other settings from the page.
142
- const decoder_dtype = document.getElementById("decoder-dtype").value || "q8";
143
- const embed_dtype = document.getElementById("embed-dtype").value || "q8";
144
- const vision_dtype = document.getElementById("vision-dtype").value || "q8";
145
- const device = document.getElementById("device").value;
146
- const imageUrl = document.getElementById("image-url").value;
147
- const maxTokens = parseInt(document.getElementById("max-tokens").value) || 128;
148
- const numRuns = parseInt(document.getElementById("num-runs").value) || 5;
149
- const doImageSplitting = document.getElementById("do-split").checked;
150
-
151
- const dtypeSettings = {
152
- decoder: decoder_dtype,
153
- embed: embed_dtype,
154
- vision: vision_dtype,
155
- };
156
-
157
- // Load the benchmark image.
158
- const image = await load_image(imageUrl);
159
-
160
- // Loop through each model.
161
- for (const modelId of modelIds) {
162
- overallResultsHtml += `<div class="model-results"><h2>Benchmark Results for ${modelId}</h2>`;
163
- // Pre-run warmup (compiling shaders, initialization) with max_new_tokens: 1.
164
- try {
 
 
 
 
 
 
 
 
165
  const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
166
- const messages = [{
167
- role: "user",
168
- content: [
169
- { type: "image" },
170
- { type: "text", text: "Can you describe this image?" },
171
- ],
172
- }];
173
  const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
174
  const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  await model.generate({
176
  ...inputs,
177
- max_new_tokens: 1,
 
 
 
 
 
 
 
 
 
 
 
178
  });
179
- } catch (e) {
180
- overallResultsHtml += "<p>Error during warmup: " + e.toString() + "</p></div>";
181
- continue;
182
- }
183
-
184
- // Benchmark runs using streaming generation.
185
- let totalTime = 0;
186
- let totalTps = 0;
187
- let runsResults = [];
188
- for (let i = 0; i < numRuns; ++i) {
189
- try {
190
- const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
191
- const messages = [{
192
- role: "user",
193
- content: [
194
- { type: "image" },
195
- { type: "text", text: "Can you describe this image?" },
196
- ],
197
- }];
198
- const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
199
- const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
200
- let startTime, numTokens = 0, tps = 0;
201
- const token_callback_function = () => {
202
- startTime = startTime || performance.now();
203
- tps = (numTokens++ / (performance.now() - startTime)) * 1000;
204
- };
205
- const streamer = new TextStreamer(processor.tokenizer, {
206
- skip_prompt: true,
207
- skip_special_tokens: true,
208
- token_callback_function,
209
- });
210
- const generateStartTime = performance.now();
211
- await model.generate({
212
- ...inputs,
213
- max_new_tokens: maxTokens,
214
- min_new_tokens: maxTokens,
215
- streamer,
216
- });
217
- const endTime = performance.now();
218
- const elapsed = endTime - generateStartTime;
219
- totalTime += elapsed;
220
- totalTps += tps;
221
- runsResults.push({
222
- run: i + 1,
223
- time: elapsed.toFixed(2),
224
- tps: tps.toFixed(2)
225
- });
226
- } catch (e) {
227
- runsResults.push({ run: i + 1, time: "Error", tps: "Error" });
228
- }
229
  }
 
230
  const avgTime = (totalTime / numRuns).toFixed(2);
231
  const avgTps = (totalTps / numRuns).toFixed(2);
 
 
 
232
  let tableHtml = "<table>";
233
  tableHtml += "<tr><th>Run</th><th>Execution Time (ms)</th><th>Tokens per Second</th></tr>";
234
  runsResults.forEach(r => {
@@ -236,13 +226,23 @@
236
  });
237
  tableHtml += `<tr><td><strong>Average</strong></td><td><strong>${avgTime}</strong></td><td><strong>${avgTps}</strong></td></tr>`;
238
  tableHtml += "</table>";
239
- overallResultsHtml += tableHtml + "</div>";
 
 
 
240
  }
241
- resultsDiv.innerHTML = overallResultsHtml;
242
  }
243
-
244
- document.getElementById("start-benchmark").addEventListener("click", runBenchmark);
245
- </script>
 
 
 
 
 
 
 
 
246
  </body>
247
  </html>
248
 
 
87
 
88
  <div id="results"></div>
89
 
90
+ <script type="module">
91
+ import {
92
+ AutoProcessor,
93
+ AutoModelForVision2Seq,
94
+ load_image,
95
+ TextStreamer,
96
+ } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.4.2";
97
+
98
+ class SmolVLM {
99
+ static model = null;
100
+ static processor = null;
101
+ static model_id = null;
102
+ static async getInstance(modelId, dtypeSettings, device) {
103
+ if (this.model_id !== modelId) {
104
+ this.model = null;
105
+ this.processor = null;
106
+ this.model_id = modelId;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  }
108
+ if (!this.processor) {
109
+ this.processor = await AutoProcessor.from_pretrained(modelId);
110
+ }
111
+ if (!this.model) {
112
+ this.model = await AutoModelForVision2Seq.from_pretrained(modelId, {
113
+ dtype: {
114
+ embed_tokens: dtypeSettings.embed,
115
+ vision_encoder: dtypeSettings.vision,
116
+ decoder_model_merged: dtypeSettings.decoder,
117
+ },
118
+ device: device,
119
+ });
120
+ }
121
+ return [this.processor, this.model];
122
  }
123
+ }
124
+
125
+ async function runBenchmark() {
126
+ document.getElementById("model-options").disabled = true;
127
+ document.getElementById("hardware-options").disabled = true;
128
+ const resultsDiv = document.getElementById("results");
129
+ resultsDiv.innerHTML = "";
130
+
131
+ const modelIds = [
132
+ "HuggingFaceTB/SmolVLM-256M-Instruct",
133
+ "HuggingFaceTB/SmolVLM-500M-Instruct",
134
+ "HuggingFaceTB/SmolVLM-Instruct"
135
+ ];
136
 
137
+ const decoder_dtype = document.getElementById("decoder-dtype").value || "fp32";
138
+ const embed_dtype = document.getElementById("embed-dtype").value || "fp32";
139
+ const vision_dtype = document.getElementById("vision-dtype").value || "fp32";
140
+ const device = document.getElementById("device").value;
141
+ const imageUrl = document.getElementById("image-url").value;
142
+ const maxTokens = parseInt(document.getElementById("max-tokens").value) || 128;
143
+ const numRuns = parseInt(document.getElementById("num-runs").value) || 5;
144
+ const doImageSplitting = document.getElementById("do-split").checked;
145
+
146
+ const dtypeSettings = { decoder: decoder_dtype, embed: embed_dtype, vision: vision_dtype };
147
+ const image = await load_image(imageUrl);
148
+
149
+ for (const modelId of modelIds) {
150
+ const modelShortName = modelId.split("/").pop();
151
+ const modelSection = document.createElement("div");
152
+ modelSection.className = "model-results";
153
+ modelSection.innerHTML = `<h2>Benchmarking ${modelShortName}</h2><p id="status-${modelShortName}">Loading...</p><pre id="bar-${modelShortName}">▯▯▯▯▯</pre>`;
154
+ resultsDiv.appendChild(modelSection);
155
+
156
+ const status = document.getElementById(`status-${modelShortName}`);
157
+ const bar = document.getElementById(`bar-${modelShortName}`);
158
+
159
+ try {
160
+ status.innerText = "Loading processor and model...";
161
+ const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
162
+
163
+ status.innerText = "Warming up...";
164
+ const messages = [{
165
+ role: "user",
166
+ content: [
167
+ { type: "image" },
168
+ { type: "text", text: "Can you describe this image?" },
169
+ ],
170
+ }];
171
+ const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
172
+ const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
173
+ await model.generate({ ...inputs, max_new_tokens: 1 });
174
+
175
+ let totalTime = 0;
176
+ let totalTps = 0;
177
+ let runsResults = [];
178
+
179
+ for (let i = 0; i < numRuns; ++i) {
180
+ status.innerText = `Running benchmark... (${i + 1}/${numRuns})`;
181
+ bar.innerText = createProgressBar(i + 1, numRuns);
182
+ const start = performance.now();
183
  const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
 
 
 
 
 
 
 
184
  const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
185
  const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
186
+
187
+ let numTokens = 0;
188
+ let startTime;
189
+ let tps = 0;
190
+ const token_callback_function = () => {
191
+ startTime = startTime || performance.now();
192
+ tps = (numTokens++ / (performance.now() - startTime)) * 1000;
193
+ };
194
+ const streamer = new TextStreamer(processor.tokenizer, {
195
+ skip_prompt: true,
196
+ skip_special_tokens: true,
197
+ token_callback_function,
198
+ });
199
+
200
  await model.generate({
201
  ...inputs,
202
+ max_new_tokens: maxTokens,
203
+ min_new_tokens: maxTokens,
204
+ streamer,
205
+ });
206
+
207
+ const elapsed = performance.now() - start;
208
+ totalTime += elapsed;
209
+ totalTps += tps;
210
+ runsResults.push({
211
+ run: i + 1,
212
+ time: elapsed.toFixed(2),
213
+ tps: tps.toFixed(2)
214
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  }
216
+
217
  const avgTime = (totalTime / numRuns).toFixed(2);
218
  const avgTps = (totalTps / numRuns).toFixed(2);
219
+ status.innerText = "✅ Done!";
220
+ bar.innerText = createProgressBar(numRuns, numRuns);
221
+
222
  let tableHtml = "<table>";
223
  tableHtml += "<tr><th>Run</th><th>Execution Time (ms)</th><th>Tokens per Second</th></tr>";
224
  runsResults.forEach(r => {
 
226
  });
227
  tableHtml += `<tr><td><strong>Average</strong></td><td><strong>${avgTime}</strong></td><td><strong>${avgTps}</strong></td></tr>`;
228
  tableHtml += "</table>";
229
+ modelSection.innerHTML += tableHtml;
230
+
231
+ } catch (e) {
232
+ status.innerText = "❌ Error: " + e.toString();
233
  }
 
234
  }
235
+ }
236
+
237
+ function createProgressBar(current, total) {
238
+ const filled = "▮".repeat(current);
239
+ const empty = "▯".repeat(total - current);
240
+ return filled + empty;
241
+ }
242
+
243
+ document.getElementById("start-benchmark").addEventListener("click", runBenchmark);
244
+ </script>
245
+
246
  </body>
247
  </html>
248