JonnaMat's picture
Add Qwen3.5 Benchmarks (#2)
feca03d
// ─── Benchmark Viewer β€” config-driven from config.json + data.csv ─────────────
(async function () {
// ─── Theme-aware color helper ─────────────────────────────────────────────────
function cssVar(name) {
return getComputedStyle(document.documentElement).getPropertyValue(name).trim();
}
// ─── Load config ──────────────────────────────────────────────────────────────
const config = await fetch("config.json").then(r => r.json());
// ─── Parse CSV ────────────────────────────────────────────────────────────────
function parseCSV(text) {
const lines = text.replace(/\r/g, "").trim().split("\n");
const headers = lines[0].split(",");
// Determine which columns are numeric (metrics + numeric filters + numeric display columns)
const numericCols = new Set(
config.metrics.map(m => m.column).concat(
config.filters.filter(f => f.type === "number").map(f => f.column),
(config.display_columns || []).filter(d => d.type === "number").map(d => d.column)
)
);
return lines.slice(1).map(line => {
const vals = line.split(",");
const row = {};
headers.forEach((h, i) => {
const raw = (vals[i] || "").trim();
if (raw === "") {
row[h] = numericCols.has(h) ? null : "";
} else if (numericCols.has(h)) {
row[h] = raw.toUpperCase() === "OOM" ? null : parseFloat(raw);
} else {
row[h] = raw;
}
});
return row;
});
}
// ─── Per-family data cache ────────────────────────────────────────────────────
const familyDataCache = {};
async function loadFamilyData(familyKey) {
if (familyDataCache[familyKey]) return familyDataCache[familyKey];
const familyCfg = config.model_families?.[familyKey] || {};
const dataFile = familyCfg.data_file;
if (!dataFile) return [];
const csvText = await fetch(dataFile).then(r => r.text());
const rows = parseCSV(csvText);
familyDataCache[familyKey] = rows;
return rows;
}
// Current family's loaded data
let DATA = [];
// ─── Config shortcuts ─────────────────────────────────────────────────────────
const MODEL_COL = config.model_column;
const FAMILY_COL = config.model_family_column || "";
const LINK_PREFIX = config.model_link_prefix || "";
const OPT_ORG = config.optimized_org || "embedl";
const CHART_CFG = config.chart || {};
const GROUP_BY = CHART_CFG.group_by || config.filters[config.filters.length - 1]?.column || "";
function isExternalModel(model) {
return !model.startsWith(OPT_ORG + "/");
}
// ─── Derive unique values ─────────────────────────────────────────────────────
let ALL_MODELS = [];
// ─── Model Family Keys from config ────────────────────────────────────────────
const ALL_FAMILY_KEYS = Object.keys(config.model_families || {});
let MODEL_FAMILIES = {};
// Detect families from currently loaded DATA
function detectFamilies() {
const families = {};
if (FAMILY_COL) {
DATA.forEach(row => {
const fk = row[FAMILY_COL];
const model = row[MODEL_COL];
if (!fk) return;
if (!families[fk]) families[fk] = { base: fk, models: [] };
if (!families[fk].models.includes(model)) families[fk].models.push(model);
});
} else {
const externalNames = ALL_MODELS.filter(isExternalModel).map(m => m.split("/").pop());
externalNames.sort((a, b) => b.length - a.length);
ALL_MODELS.forEach(model => {
const shortName = model.split("/").pop();
if (isExternalModel(model)) {
if (!families[shortName]) families[shortName] = { base: shortName, models: [] };
families[shortName].models.push(model);
} else {
const match = externalNames.find(base => shortName.startsWith(base));
const key = match || shortName;
if (!families[key]) families[key] = { base: key, models: [] };
families[key].models.push(model);
}
});
}
return families;
}
// ─── Model colors & short labels ──────────────────────────────────────────────
function hexToRgba(hex, alpha) {
const r = parseInt(hex.slice(1,3),16), g = parseInt(hex.slice(3,5),16), b = parseInt(hex.slice(5,7),16);
return `rgba(${r},${g},${b},${alpha})`;
}
function buildColorPalette() {
const barAlpha = 0.75;
const neutralAlpha = 0.45;
const teal = cssVar("--teal"), green = cssVar("--green"), pink = cssVar("--pink"),
purple = cssVar("--purple"), red = cssVar("--red");
return {
palette: [
{ bg: hexToRgba(teal, barAlpha), border: teal },
{ bg: hexToRgba(green, barAlpha), border: green },
{ bg: hexToRgba(pink, barAlpha), border: pink },
{ bg: hexToRgba(purple, barAlpha), border: purple },
{ bg: "rgba(255,209,102," + barAlpha + ")", border: "#ffd166" },
],
neutral: { bg: hexToRgba(cssVar("--neutral"), neutralAlpha), border: cssVar("--neutral") },
};
}
let COLOR_PALETTE, NEUTRAL_COLOR;
const MODEL_COLORS = {};
const MODEL_SHORT = {};
function assignModelColors() {
let colorIdx = 0;
const { palette, neutral } = buildColorPalette();
COLOR_PALETTE = palette; NEUTRAL_COLOR = neutral;
// Only assign colors for models in the current DATA
const currentFamilies = Object.keys(MODEL_FAMILIES);
currentFamilies.forEach(fk => {
const family = MODEL_FAMILIES[fk];
family.models.forEach(model => {
if (isExternalModel(model)) {
MODEL_COLORS[model] = NEUTRAL_COLOR;
} else {
MODEL_COLORS[model] = COLOR_PALETTE[colorIdx % COLOR_PALETTE.length];
colorIdx++;
}
const name = model.split("/").pop();
const suffix = name.slice(family.base.length).replace(/^-/, "");
MODEL_SHORT[model] = suffix || (isExternalModel(model) ? "Original" : name);
});
});
}
// ─── Helpers ──────────────────────────────────────────────────────────────────
function isOOMRow(row) {
return config.metrics.every(m => row[m.column] === null);
}
function familyRows(familyKey) {
const models = new Set((MODEL_FAMILIES[familyKey] || { models: [] }).models);
return DATA.filter(r => models.has(r[MODEL_COL]));
}
function availableOptions(familyKey) {
const rows = familyRows(familyKey);
const opts = {};
config.filters.forEach(f => {
const vals = [...new Set(rows.map(r => r[f.column]).filter(v => v !== "" && v !== null && v !== undefined))];
if (f.type === "number") vals.sort((a, b) => a - b);
opts[f.column] = vals;
});
return opts;
}
// Resolve display label for a filter value
function valueLabel(filterCfg, val) {
if (filterCfg.value_labels && filterCfg.value_labels[val]) return filterCfg.value_labels[val];
if (typeof val === "string") return val.charAt(0).toUpperCase() + val.slice(1);
return String(val);
}
// Sort models: external (original) first, then optimized
function sortModels(models) {
return [...models].sort((a, b) => {
const aExt = isExternalModel(a) ? 0 : 1;
const bExt = isExternalModel(b) ? 0 : 1;
return aExt - bExt || a.localeCompare(b);
});
}
// ─── Populate page from config ────────────────────────────────────────────────
// Hero
if (config.title) document.getElementById("hero-title").innerHTML = config.title.replace(/^(.*?)(\s\S+)$/, '$1 <span class="accent">$2</span>');
if (config.subtitle) document.getElementById("hero-sub").textContent = config.subtitle;
// Sidebar: model families
const familyNav = document.getElementById("family-nav");
function renderSidebar() {
familyNav.innerHTML = ALL_FAMILY_KEYS.map(fk =>
`<div class="sidebar-item${fk === filters.family ? " active" : ""}" data-family="${fk}">${fk}</div>`
).join("");
}
familyNav.addEventListener("click", async e => {
const item = e.target.closest(".sidebar-item");
if (!item) return;
filters.family = item.dataset.family;
renderSidebar();
await switchFamily(filters.family);
});
// Build filter groups container dynamically (no family filter here)
const filtersBar = document.getElementById("filters-bar");
filtersBar.innerHTML = "";
config.filters.forEach(f => {
filtersBar.appendChild(createFilterGroup(f.label, "filter-" + f.column));
});
// Metric filter (always last)
filtersBar.appendChild(createFilterGroup("METRIC", "filter-metric"));
function createFilterGroup(label, id) {
const div = document.createElement("div");
div.className = "filter-group";
div.innerHTML = `<label>${label}</label><div class="btn-group" id="${id}"></div>`;
return div;
}
// Metric legend
const legendGrid = document.getElementById("legend-grid");
legendGrid.innerHTML = config.metrics.map(m =>
`<div><strong>${m.short || m.column}</strong> ${m.description || m.label}</div>`
).join("");
// ─── State ────────────────────────────────────────────────────────────────────
const filters = { family: ALL_FAMILY_KEYS[0] || "" };
config.filters.forEach(f => { filters[f.column] = ""; });
filters.metric = CHART_CFG.default_metric || config.metrics[0]?.column || "";
// ─── Render button groups ─────────────────────────────────────────────────────
function renderBtnGroup(container, items, activeValue) {
container.innerHTML = items.map(({ value, label }) =>
`<button class="btn${String(value) === String(activeValue) ? " active" : ""}" data-value="${value}">${label}</button>`
).join("");
}
function populateFilters() {
renderSidebar();
// Metric buttons
const metricEl = document.getElementById("filter-metric");
renderBtnGroup(metricEl,
config.metrics.map(m => ({ value: m.column, label: m.short || m.column })),
filters.metric
);
metricEl.closest(".filter-group").style.display = (config.metrics.length <= 1 || filters[GROUP_BY] === "all") ? "none" : "";
updateDependentFilters();
}
function updateDependentFilters() {
const opts = availableOptions(filters.family);
config.filters.forEach(f => {
let vals = opts[f.column] || [];
// Sort by value_labels key order if defined
if (f.value_labels) {
const labelOrder = Object.keys(f.value_labels);
vals = [...vals].sort((a, b) => {
const ai = labelOrder.indexOf(String(a));
const bi = labelOrder.indexOf(String(b));
return (ai === -1 ? Infinity : ai) - (bi === -1 ? Infinity : bi);
});
}
const strVals = vals.map(String);
if (!strVals.includes(String(filters[f.column]))) {
filters[f.column] = vals[0] ?? "";
}
// For the group_by filter, add "All" option
const items = [];
if (f.column === GROUP_BY) {
items.push({ value: "all", label: "All" });
}
vals.forEach(v => items.push({ value: String(v), label: valueLabel(f, v) }));
const el = document.getElementById("filter-" + f.column);
if (el) {
renderBtnGroup(el, items, String(filters[f.column]));
// Hide the entire filter group if only one effective choice
const effectiveCount = f.column === GROUP_BY ? items.length - 1 : items.length;
el.closest(".filter-group").style.display = effectiveCount <= 1 ? "none" : "";
}
});
}
// ─── Event binding ────────────────────────────────────────────────────────────
filtersBar.addEventListener("click", e => {
const btn = e.target.closest(".btn");
if (!btn) return;
const group = btn.closest(".btn-group");
group.querySelectorAll(".btn").forEach(b => b.classList.remove("active"));
btn.classList.add("active");
const key = group.id.replace("filter-", "");
filters[key] = btn.dataset.value;
render();
});
// ─── Chart ────────────────────────────────────────────────────────────────────
let charts = [];
function buildChart(filtered) {
const section = document.getElementById("charts-section");
section.innerHTML = "";
charts.forEach(c => c.destroy());
charts = [];
const familyCfg = config.model_families?.[filters.family] || {};
const chartCfg = familyCfg.chart || CHART_CFG;
const scenarios = chartCfg.scenarios || [];
const metricCol = filters.metric;
const metricCfg = config.metrics.find(m => m.column === metricCol) || {};
const groupFilterCfg = config.filters.find(f => f.column === GROUP_BY);
const groupVal = filters[GROUP_BY];
if (groupVal === "all") return;
const groupLabel = groupFilterCfg?.value_labels?.[groupVal] || String(groupVal);
const gRows = filtered.filter(r => String(r[GROUP_BY]) === String(groupVal));
if (!gRows.length) return;
// Don't show chart when there is only a single model
const uniqueModels = new Set(gRows.map(r => r[MODEL_COL]));
if (uniqueModels.size <= 1) return;
// If no scenarios configured, show one chart with all rows
const scenarioList = scenarios.length
? scenarios
: [{ label: "", match: {} }];
scenarioList.forEach(scenario => {
// Filter rows matching this scenario
const matchRows = gRows.filter(r =>
Object.entries(scenario.match || {}).every(([col, val]) =>
String(r[col]) === String(val)
)
);
// For models with only OOM rows (no scenario match), include one OOM row
// but only if it matches the active filter values (e.g. type)
const matchedModels = new Set(matchRows.map(r => r[MODEL_COL]));
const oomRows = gRows.filter(r => !matchedModels.has(r[MODEL_COL]) && isOOMRow(r)
&& Object.entries(scenario.match || {}).every(([col, val]) =>
r[col] === null || r[col] === "" || r[col] === "OOM" || String(r[col]) === String(val)
)
);
const allRows = matchRows.concat(oomRows);
const models = sortModels([...new Set(allRows.map(r => r[MODEL_COL]))]);
const picked = models.map(m => allRows.find(r => r[MODEL_COL] === m)).filter(Boolean);
if (!picked.length) return;
const labels = picked.map(r => MODEL_SHORT[r[MODEL_COL]]);
const data = picked.map(r => r[metricCol] === null ? 0 : r[metricCol]);
const bgColors = picked.map(r => MODEL_COLORS[r[MODEL_COL]].bg);
const borderColors = picked.map(r => MODEL_COLORS[r[MODEL_COL]].border);
const metricHint = metricCfg.higher_is_better ? " (higher is better)" : " (lower is better)";
const yLabel = (metricCfg.label || metricCol) + metricHint;
const chartBlock = document.createElement("div");
chartBlock.className = "chart-block";
const heading = document.createElement("h3");
heading.className = "chart-heading";
heading.textContent = groupLabel;
chartBlock.appendChild(heading);
if (scenario.label) {
const sub = document.createElement("p");
sub.className = "chart-subtitle";
sub.textContent = scenario.label;
chartBlock.appendChild(sub);
}
const wrap = document.createElement("div");
wrap.className = "chart-wrap";
const canvas = document.createElement("canvas");
wrap.appendChild(canvas);
chartBlock.appendChild(wrap);
section.appendChild(chartBlock);
const c = new Chart(canvas, {
type: "bar",
data: {
labels,
datasets: [{
data,
backgroundColor: bgColors,
borderColor: borderColors,
borderWidth: 2, borderRadius: 6, minBarLength: 4,
}],
},
options: {
responsive: true, maintainAspectRatio: false,
plugins: {
legend: { display: false },
title: { display: false },
tooltip: {
backgroundColor: cssVar("--tooltip-bg"), titleColor: cssVar("--tooltip-text"), bodyColor: cssVar("--tooltip-body"),
borderColor: cssVar("--btn-active-border"), borderWidth: 1,
callbacks: {
label: ctx => {
const orig = picked[ctx.dataIndex]?.[metricCol];
return orig === null ? "OOM" : orig.toLocaleString();
},
},
},
},
scales: {
y: { beginAtZero: true, title: { display: true, text: yLabel, color: cssVar("--text-muted") }, grid: { color: cssVar("--border") }, ticks: { color: cssVar("--text-dim") } },
x: { grid: { display: false }, ticks: { color: cssVar("--text-muted"), font: { size: 14 } } },
},
},
});
charts.push(c);
});
}
// ─── Tables ───────────────────────────────────────────────────────────────────
function buildTables(filtered, chartsShown) {
const section = document.getElementById("tables-section");
section.innerHTML = "";
const groupFilterCfg = config.filters.find(f => f.column === GROUP_BY);
const groupVal = filters[GROUP_BY];
const opts = availableOptions(filters.family);
let groupVals = groupVal === "all" ? (opts[GROUP_BY] || []) : [groupVal];
if (groupVal === "all" && groupFilterCfg?.value_labels) {
const labelOrder = Object.keys(groupFilterCfg.value_labels);
groupVals = [...groupVals].sort((a, b) => {
const ai = labelOrder.indexOf(String(a));
const bi = labelOrder.indexOf(String(b));
return (ai === -1 ? Infinity : ai) - (bi === -1 ? Infinity : bi);
});
}
// Determine which display columns are visible given current filter state
const visibleDisplay = (config.display_columns || []).filter(dc => {
if (!dc.visible_when) return true;
return Object.entries(dc.visible_when).every(([filterCol, allowedVals]) =>
allowedVals.includes(filters[filterCol])
);
});
// Build column list: Model + visible display cols + metrics
const colDefs = [
{ key: MODEL_COL, label: "MODEL", isModel: true },
...visibleDisplay.map(dc => ({ key: dc.column, label: dc.label, description: dc.description || "" })),
...config.metrics.map(m => ({ key: m.column, label: m.short || m.column, isMetric: true, description: m.description || "" })),
];
// Resolve table_sort: family-specific overrides global
const familyCfg = config.model_families?.[filters.family] || {};
const sortRules = familyCfg.table_sort || config.table_sort || [];
const tableGroupBy = familyCfg.table_group_by || config.table_group_by || "";
const tableGroupCols = Array.isArray(tableGroupBy) ? tableGroupBy : (tableGroupBy ? [tableGroupBy] : []);
groupVals.forEach(gv => {
const rows = filtered.filter(r => String(r[GROUP_BY]) === String(gv));
if (!rows.length) return;
rows.sort((a, b) => {
for (const rule of sortRules) {
const col = rule.column;
const mul = rule.direction === "desc" ? -1 : 1;
if (rule.external_first && col === MODEL_COL) {
const aExt = isExternalModel(a[col]) ? 0 : 1;
const bExt = isExternalModel(b[col]) ? 0 : 1;
if (aExt !== bExt) return (aExt - bExt) * mul;
}
const av = a[col], bv = b[col];
if (av === bv || (av == null && bv == null)) continue;
if (av == null) return 1;
if (bv == null) return -1;
if (typeof av === "number" && typeof bv === "number") {
if (av !== bv) return (av - bv) * mul;
} else {
const aNum = parseFloat(String(av));
const bNum = parseFloat(String(bv));
if (!isNaN(aNum) && !isNaN(bNum)) {
if (aNum !== bNum) return (aNum - bNum) * mul;
}
const cmp = String(av).localeCompare(String(bv));
if (cmp !== 0) return cmp * mul;
}
}
return 0;
});
// Track row group for break detection
let prevGroupVal = undefined;
const card = document.createElement("div");
card.className = "table-card";
const heading = groupFilterCfg?.value_labels?.[gv] || String(gv);
let html = chartsShown ? '' : `<h3>${heading}</h3>`;
html += `<div class="table-scroll"><table><thead><tr>`;
const firstMetricIdx = colDefs.findIndex(c => c.isMetric);
html += colDefs.map((c, i) => {
const tip = c.description ? ` data-tip="${c.description.replace(/"/g, '&quot;')}"` : '';
const cls = i === firstMetricIdx ? ' class="first-metric metric-cell"' : (c.isMetric ? ' class="metric-cell"' : '');
return `<th${tip}${cls}>${c.label}</th>`;
}).join("");
html += `</tr></thead><tbody>`;
// Compute best metric value per sub-group (tableGroupBy) per column
const bestByGroup = {};
const groupRowKey = r => tableGroupCols.length
? tableGroupCols.map(c => String(r[c] ?? "")).join("\0")
: "__all__";
const subGroups = tableGroupCols.length
? [...new Set(rows.map(groupRowKey))]
: ["__all__"];
subGroups.forEach(sg => {
const groupRows = tableGroupCols.length ? rows.filter(r => groupRowKey(r) === sg) : rows;
bestByGroup[sg] = {};
colDefs.filter(c => c.isMetric).forEach(c => {
const metricCfg = config.metrics.find(m => m.column === c.key);
const vals = groupRows.map(r => r[c.key]).filter(v => v !== null && v !== undefined);
if (vals.length) {
bestByGroup[sg][c.key] = metricCfg?.higher_is_better ? Math.max(...vals) : Math.min(...vals);
}
});
});
rows.forEach(r => {
const oom = isOOMRow(r);
let rowClass = "";
if (tableGroupCols.length) {
const curVal = groupRowKey(r);
if (prevGroupVal !== undefined && curVal !== prevGroupVal) {
rowClass = "row-group-break";
}
prevGroupVal = curVal;
}
html += `<tr class="${rowClass}">`;
colDefs.forEach((c, i) => {
const val = r[c.key];
const fmCls = i === firstMetricIdx ? ' class="first-metric metric-cell"' : ' class="metric-cell"';
if (c.isModel) {
const hfUrl = LINK_PREFIX + val;
const modelColor = MODEL_COLORS[val]?.border || '#888';
html += `<td class="model-cell"><span class="model-dot" style="background:${modelColor}"></span><a href="${hfUrl}" target="_blank" rel="noopener" style="color:${modelColor}">${val}</a></td>`;
} else if (oom) {
html += `<td${c.isMetric ? fmCls : ''}><span class="oom">OOM</span></td>`;
} else if (c.isMetric) {
const sg = groupRowKey(r);
const isBest = val !== null && val !== undefined && val === bestByGroup[sg]?.[c.key];
const display = val === null ? '<span class="oom">OOM</span>' : (typeof val === "number" ? val.toFixed(2) : (val ?? "β€”"));
const modelColor = MODEL_COLORS[r[MODEL_COL]]?.border || '#888';
html += `<td${fmCls}>${isBest ? '<strong style="color: white; opacity: 0.7">' + display + '</strong>' : display}</td>`;
} else {
html += `<td>${val || "β€”"}</td>`;
}
});
html += "</tr>";
});
html += "</tbody></table></div>";
card.innerHTML = html;
section.appendChild(card);
});
}
// ─── Experiment Setup ─────────────────────────────────────────────────────────
function buildExperimentSetup() {
const section = document.getElementById("experiment-setup");
section.innerHTML = "";
const familyCfg = config.model_families?.[filters.family] || {};
const setupMap = familyCfg.experiment_setup || {};
const groupVal = filters[GROUP_BY];
const deviceVals = groupVal === "all"
? []
: (setupMap[groupVal] ? [groupVal] : []);
if (!deviceVals.length) {
section.style.display = "none";
return;
}
section.style.display = "";
deviceVals.forEach(dv => {
const text = setupMap[dv];
if (!text) return;
const p = document.createElement("p");
p.textContent = text;
section.appendChild(p);
});
}
// ─── Render ───────────────────────────────────────────────────────────────────
function render() {
const familyModels = MODEL_FAMILIES[filters.family]
? new Set(MODEL_FAMILIES[filters.family].models)
: new Set(ALL_MODELS);
const filtered = DATA.filter(r => {
if (!familyModels.has(r[MODEL_COL])) return false;
for (const f of config.filters) {
const fv = filters[f.column];
if (fv === "all" || fv === "" || fv === undefined) continue;
if (String(r[f.column]) !== String(fv)) return false;
}
return true;
});
buildChart(filtered);
const chartsShown = charts.length > 0;
// Toggle metric selector visibility
const metricEl = document.getElementById("filter-metric");
if (metricEl) {
metricEl.closest(".filter-group").style.display =
(config.metrics.length <= 1 || !chartsShown) ? "none" : "";
}
buildTables(filtered, chartsShown);
buildExperimentSetup();
}
// ─── Switch Family (load data + re-render) ────────────────────────────────────
async function switchFamily(familyKey) {
DATA = await loadFamilyData(familyKey);
ALL_MODELS = [...new Set(DATA.map(r => r[MODEL_COL]))];
MODEL_FAMILIES = detectFamilies();
assignModelColors();
updateDependentFilters();
render();
}
// ─── Init ─────────────────────────────────────────────────────────────────────
populateFilters();
await switchFamily(filters.family);
})();