Spaces:

embedl
/

Edge-Inference-Benchmarks

Running

Jonna Marie Matthiesen Copilot commited on 17 days ago

Commit

19a0613

1 Parent(s): 2d4b81f

Add accuracy table above filters for model families with accuracy data

Add accuracy_file config key per model family pointing to a CSV with
accuracy benchmarks. When present, an accuracy table renders above the
filter buttons showing per-model scores with best values highlighted.
The table respects variant selection, filtering to active models.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

Files changed (3) hide show

app.js +86 -0
config.json +2 -1
index.html +3 -0

app.js CHANGED Viewed

@@ -59,6 +59,38 @@ async function loadFamilyData(familyKey) {
 // Current family's loaded data
 let DATA = [];
 // ─── Config shortcuts ─────────────────────────────────────────────────────────
 const MODEL_COL     = config.model_column;
@@ -705,6 +737,59 @@ function buildExperimentSetup() {
     });
 }
 // ─── Render ───────────────────────────────────────────────────────────────────
 function render() {
@@ -729,6 +814,7 @@ function render() {
             (config.metrics.length <= 1 || !chartsShown) ? "none" : "";
     }
     buildTables(filtered, chartsShown);
     buildExperimentSetup();
 }

 // Current family's loaded data
 let DATA = [];
+// ─── Accuracy data cache ──────────────────────────────────────────────────────
+const accDataCache = {};
+async function loadAccuracyData(filePath) {
+    if (!filePath) return null;
+    if (accDataCache[filePath]) return accDataCache[filePath];
+    try {
+        const text = await fetch(filePath).then(r => {
+            if (!r.ok) return null;
+            return r.text();
+        });
+        if (!text) return null;
+        const lines = text.replace(/\r/g, "").trim().split("\n");
+        const headers = lines[0].split(",");
+        const rows = lines.slice(1).map(line => {
+            const vals = line.split(",");
+            const row = {};
+            headers.forEach((h, i) => {
+                const raw = (vals[i] || "").trim();
+                row[h] = raw;
+            });
+            return row;
+        });
+        const result = { headers, rows };
+        accDataCache[filePath] = result;
+        return result;
+    } catch {
+        return null;
+    }
+}
 // ─── Config shortcuts ─────────────────────────────────────────────────────────
 const MODEL_COL     = config.model_column;
     });
 }
+// ─── Accuracy Table ───────────────────────────────────────────────────────────
+async function buildAccuracyTable() {
+    const section = document.getElementById("accuracy-section");
+    section.innerHTML = "";
+    const familyCfg = config.model_families?.[activeFamilyKey()] || {};
+    const accFile = familyCfg.accuracy_file;
+    if (!accFile) return;
+    const accData = await loadAccuracyData(accFile);
+    if (!accData || !accData.rows.length) return;
+    // Filter to active models if a variant is selected
+    const activeModels = getActiveModelSet();
+    const rows = accData.rows.filter(r => activeModels.has(r[accData.headers[0]]));
+    if (!rows.length) return;
+    const modelCol = accData.headers[0];
+    const metricCols = accData.headers.slice(1);
+    const card = document.createElement("div");
+    card.className = "table-card";
+    let html = `<h3>Accuracy</h3><div class="table-scroll"><table><thead><tr>`;
+    html += `<th>MODEL</th>`;
+    html += metricCols.map(h => `<th class="metric-cell">${h}</th>`).join("");
+    html += `</tr></thead><tbody>`;
+    // Find best value per column (higher is better for accuracy)
+    const best = {};
+    metricCols.forEach(col => {
+        const vals = rows.map(r => parseFloat(r[col])).filter(v => !isNaN(v));
+        if (vals.length) best[col] = Math.max(...vals);
+    });
+    rows.forEach(r => {
+        const model = r[modelCol];
+        const modelColor = MODEL_COLORS[model]?.border || '#888';
+        html += `<tr><td class="model-cell"><span class="model-dot" style="background:${modelColor}"></span><a href="${LINK_PREFIX}${model}" target="_blank" rel="noopener" style="color:${modelColor}">${model}</a></td>`;
+        metricCols.forEach(col => {
+            const val = parseFloat(r[col]);
+            const isBest = !isNaN(val) && val === best[col];
+            const display = isNaN(val) ? (r[col] || "—") : val.toFixed(2);
+            html += `<td class="metric-cell">${isBest ? '<strong style="color: white; opacity: 0.7">' + display + '</strong>' : display}</td>`;
+        });
+        html += `</tr>`;
+    });
+    html += `</tbody></table></div>`;
+    card.innerHTML = html;
+    section.appendChild(card);
+}
 // ─── Render ───────────────────────────────────────────────────────────────────
 function render() {
             (config.metrics.length <= 1 || !chartsShown) ? "none" : "";
     }
     buildTables(filtered, chartsShown);
+    buildAccuracyTable();
     buildExperimentSetup();
 }

config.json CHANGED Viewed

@@ -138,7 +138,8 @@
         "agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.14.0 tegra, 256 tokens generated, 10 warm-up runs, averaged over 25 runs.",
         "orin_nano": "Measurement setup: NVIDIA AI IoT vLLM 0.14.0 tegra, 256 tokens generated, 10 warm-up runs, averaged over 25 runs."
       },
-      "default_device": "orin_nano"
     },
     "Qwen3.5": {
       "data_file": "data/Qwen3.5.csv",

         "agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.14.0 tegra, 256 tokens generated, 10 warm-up runs, averaged over 25 runs.",
         "orin_nano": "Measurement setup: NVIDIA AI IoT vLLM 0.14.0 tegra, 256 tokens generated, 10 warm-up runs, averaged over 25 runs."
       },
+      "default_device": "orin_nano",
+      "accuracy_file": "data/acc-Cosmos-Reason2.csv"
     },
     "Qwen3.5": {
       "data_file": "data/Qwen3.5.csv",

index.html CHANGED Viewed

@@ -28,6 +28,9 @@
                 <p class="hero-sub" id="hero-sub">Compare throughput and latency across devices and model variants.</p>
             </header>
             <!-- Filters -->
             <section class="filters-bar" id="filters-bar"></section>

                 <p class="hero-sub" id="hero-sub">Compare throughput and latency across devices and model variants.</p>
             </header>
+            <!-- Accuracy Table -->
+            <section id="accuracy-section"></section>
             <!-- Filters -->
             <section class="filters-bar" id="filters-bar"></section>