smolvlm-web-benchmarking-all

Running

App Files Files Community

andito HF Staff commited on Apr 3

Commit

68bfa8d

verified ·

1 Parent(s): d09721a

Update index.html

Browse files

Files changed (1) hide show

index.html +135 -135

index.html CHANGED Viewed

@@ -87,148 +87,138 @@
   <div id="results"></div>
-  <script type="module">
-    import {
-      AutoProcessor,
-      AutoModelForVision2Seq,
-      load_image,
-      TextStreamer,
-    } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.4.2";
-    // Updated SmolVLM class to allow re-loading when modelId changes.
-    class SmolVLM {
-      static model = null;
-      static processor = null;
-      static model_id = null;
-      static async getInstance(modelId, dtypeSettings, device) {
-        if(this.model_id !== modelId) {
-          // Reset if switching to a new model.
-          this.model = null;
-          this.processor = null;
-          this.model_id = modelId;
-        }
-        if (!this.processor) {
-          this.processor = await AutoProcessor.from_pretrained(modelId);
-        }
-        if (!this.model) {
-          this.model = await AutoModelForVision2Seq.from_pretrained(modelId, {
-            dtype: {
-              embed_tokens: dtypeSettings.embed,
-              vision_encoder: dtypeSettings.vision,
-              decoder_model_merged: dtypeSettings.decoder,
-            },
-            device: device,
-          });
-        }
-        return [this.processor, this.model];
       }
     }
-    async function runBenchmark() {
-      // Disable controls during benchmark.
-      document.getElementById("model-options").disabled = true;
-      document.getElementById("hardware-options").disabled = true;
-      const resultsDiv = document.getElementById("results");
-      resultsDiv.innerHTML = "<p>Loading models and running benchmarks...</p>";
-      // Define the two SmolVLM model IDs to test.
-      const modelIds = [
-        "HuggingFaceTB/SmolVLM-256M-Instruct",
-        "HuggingFaceTB/SmolVLM-500M-Instruct"
-      ];
-      let overallResultsHtml = "";
-      // Get other settings from the page.
-      const decoder_dtype = document.getElementById("decoder-dtype").value || "q8";
-      const embed_dtype = document.getElementById("embed-dtype").value || "q8";
-      const vision_dtype = document.getElementById("vision-dtype").value || "q8";
-      const device = document.getElementById("device").value;
-      const imageUrl = document.getElementById("image-url").value;
-      const maxTokens = parseInt(document.getElementById("max-tokens").value) || 128;
-      const numRuns = parseInt(document.getElementById("num-runs").value) || 5;
-      const doImageSplitting = document.getElementById("do-split").checked;
-      const dtypeSettings = {
-        decoder: decoder_dtype,
-        embed: embed_dtype,
-        vision: vision_dtype,
-      };
-      // Load the benchmark image.
-      const image = await load_image(imageUrl);
-      // Loop through each model.
-      for (const modelId of modelIds) {
-        overallResultsHtml += `<div class="model-results"><h2>Benchmark Results for ${modelId}</h2>`;
-        // Pre-run warmup (compiling shaders, initialization) with max_new_tokens: 1.
-        try {
           const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
-          const messages = [{
-            role: "user",
-            content: [
-              { type: "image" },
-              { type: "text", text: "Can you describe this image?" },
-            ],
-          }];
           const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
           const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
           await model.generate({
             ...inputs,
-            max_new_tokens: 1,
           });
-        } catch (e) {
-          overallResultsHtml += "<p>Error during warmup: " + e.toString() + "</p></div>";
-          continue;
-        }
-        // Benchmark runs using streaming generation.
-        let totalTime = 0;
-        let totalTps = 0;
-        let runsResults = [];
-        for (let i = 0; i < numRuns; ++i) {
-          try {
-            const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
-            const messages = [{
-              role: "user",
-              content: [
-                { type: "image" },
-                { type: "text", text: "Can you describe this image?" },
-              ],
-            }];
-            const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
-            const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
-            let startTime, numTokens = 0, tps = 0;
-            const token_callback_function = () => {
-              startTime = startTime || performance.now();
-              tps = (numTokens++ / (performance.now() - startTime)) * 1000;
-            };
-            const streamer = new TextStreamer(processor.tokenizer, {
-              skip_prompt: true,
-              skip_special_tokens: true,
-              token_callback_function,
-            });
-            const generateStartTime = performance.now();
-            await model.generate({
-              ...inputs,
-              max_new_tokens: maxTokens,
-              min_new_tokens: maxTokens,
-              streamer,
-            });
-            const endTime = performance.now();
-            const elapsed = endTime - generateStartTime;
-            totalTime += elapsed;
-            totalTps += tps;
-            runsResults.push({
-              run: i + 1,
-              time: elapsed.toFixed(2),
-              tps: tps.toFixed(2)
-            });
-          } catch (e) {
-            runsResults.push({ run: i + 1, time: "Error", tps: "Error" });
-          }
         }
         const avgTime = (totalTime / numRuns).toFixed(2);
         const avgTps = (totalTps / numRuns).toFixed(2);
         let tableHtml = "<table>";
         tableHtml += "<tr><th>Run</th><th>Execution Time (ms)</th><th>Tokens per Second</th></tr>";
         runsResults.forEach(r => {
@@ -236,13 +226,23 @@
         });
         tableHtml += `<tr><td><strong>Average</strong></td><td><strong>${avgTime}</strong></td><td><strong>${avgTps}</strong></td></tr>`;
         tableHtml += "</table>";
-        overallResultsHtml += tableHtml + "</div>";
       }
-      resultsDiv.innerHTML = overallResultsHtml;
     }
-    document.getElementById("start-benchmark").addEventListener("click", runBenchmark);
-  </script>
 </body>
 </html>

   <div id="results"></div>
+<script type="module">
+  import {
+    AutoProcessor,
+    AutoModelForVision2Seq,
+    load_image,
+    TextStreamer,
+  } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.4.2";
+  class SmolVLM {
+    static model = null;
+    static processor = null;
+    static model_id = null;
+    static async getInstance(modelId, dtypeSettings, device) {
+      if (this.model_id !== modelId) {
+        this.model = null;
+        this.processor = null;
+        this.model_id = modelId;
       }
+      if (!this.processor) {
+        this.processor = await AutoProcessor.from_pretrained(modelId);
+      }
+      if (!this.model) {
+        this.model = await AutoModelForVision2Seq.from_pretrained(modelId, {
+          dtype: {
+            embed_tokens: dtypeSettings.embed,
+            vision_encoder: dtypeSettings.vision,
+            decoder_model_merged: dtypeSettings.decoder,
+          },
+          device: device,
+        });
+      }
+      return [this.processor, this.model];
     }
+  }
+  async function runBenchmark() {
+    document.getElementById("model-options").disabled = true;
+    document.getElementById("hardware-options").disabled = true;
+    const resultsDiv = document.getElementById("results");
+    resultsDiv.innerHTML = "";
+    const modelIds = [
+      "HuggingFaceTB/SmolVLM-256M-Instruct",
+      "HuggingFaceTB/SmolVLM-500M-Instruct",
+      "HuggingFaceTB/SmolVLM-Instruct"
+    ];
+    const decoder_dtype = document.getElementById("decoder-dtype").value || "fp32";
+    const embed_dtype = document.getElementById("embed-dtype").value || "fp32";
+    const vision_dtype = document.getElementById("vision-dtype").value || "fp32";
+    const device = document.getElementById("device").value;
+    const imageUrl = document.getElementById("image-url").value;
+    const maxTokens = parseInt(document.getElementById("max-tokens").value) || 128;
+    const numRuns = parseInt(document.getElementById("num-runs").value) || 5;
+    const doImageSplitting = document.getElementById("do-split").checked;
+    const dtypeSettings = { decoder: decoder_dtype, embed: embed_dtype, vision: vision_dtype };
+    const image = await load_image(imageUrl);
+    for (const modelId of modelIds) {
+      const modelShortName = modelId.split("/").pop();
+      const modelSection = document.createElement("div");
+      modelSection.className = "model-results";
+      modelSection.innerHTML = `<h2>Benchmarking ${modelShortName}</h2><p id="status-${modelShortName}">Loading...</p><pre id="bar-${modelShortName}">▯▯▯▯▯</pre>`;
+      resultsDiv.appendChild(modelSection);
+      const status = document.getElementById(`status-${modelShortName}`);
+      const bar = document.getElementById(`bar-${modelShortName}`);
+      try {
+        status.innerText = "Loading processor and model...";
+        const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
+        status.innerText = "Warming up...";
+        const messages = [{
+          role: "user",
+          content: [
+            { type: "image" },
+            { type: "text", text: "Can you describe this image?" },
+          ],
+        }];
+        const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
+        const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
+        await model.generate({ ...inputs, max_new_tokens: 1 });
+        let totalTime = 0;
+        let totalTps = 0;
+        let runsResults = [];
+        for (let i = 0; i < numRuns; ++i) {
+          status.innerText = `Running benchmark... (${i + 1}/${numRuns})`;
+          bar.innerText = createProgressBar(i + 1, numRuns);
+          const start = performance.now();
           const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
           const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
           const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
+          let numTokens = 0;
+          let startTime;
+          let tps = 0;
+          const token_callback_function = () => {
+            startTime = startTime || performance.now();
+            tps = (numTokens++ / (performance.now() - startTime)) * 1000;
+          };
+          const streamer = new TextStreamer(processor.tokenizer, {
+            skip_prompt: true,
+            skip_special_tokens: true,
+            token_callback_function,
+          });
           await model.generate({
             ...inputs,
+            max_new_tokens: maxTokens,
+            min_new_tokens: maxTokens,
+            streamer,
+          });
+          const elapsed = performance.now() - start;
+          totalTime += elapsed;
+          totalTps += tps;
+          runsResults.push({
+            run: i + 1,
+            time: elapsed.toFixed(2),
+            tps: tps.toFixed(2)
           });
         }
         const avgTime = (totalTime / numRuns).toFixed(2);
         const avgTps = (totalTps / numRuns).toFixed(2);
+        status.innerText = "✅ Done!";
+        bar.innerText = createProgressBar(numRuns, numRuns);
         let tableHtml = "<table>";
         tableHtml += "<tr><th>Run</th><th>Execution Time (ms)</th><th>Tokens per Second</th></tr>";
         runsResults.forEach(r => {
         });
         tableHtml += `<tr><td><strong>Average</strong></td><td><strong>${avgTime}</strong></td><td><strong>${avgTps}</strong></td></tr>`;
         tableHtml += "</table>";
+        modelSection.innerHTML += tableHtml;
+      } catch (e) {
+        status.innerText = "❌ Error: " + e.toString();
       }
     }
+  }
+  function createProgressBar(current, total) {
+    const filled = "▮".repeat(current);
+    const empty = "▯".repeat(total - current);
+    return filled + empty;
+  }
+  document.getElementById("start-benchmark").addEventListener("click", runBenchmark);
+</script>
 </body>
 </html>