|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<title>Live WebApp Viewer</title> |
|
<style> |
|
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800;900&display=swap'); |
|
|
|
:root { |
|
--primary-color: #0ea5e9; |
|
--primary-dark: #0c4a6e; |
|
--primary-light: #f0f9ff; |
|
--secondary-color: #059669; |
|
--secondary-dark: #064e3b; |
|
--secondary-light: #ecfdf5; |
|
--accent-color: #7c3aed; |
|
--accent-dark: #4c1d95; |
|
--accent-light: #f3f4f6; |
|
--dark-bg: #0a0a0a; |
|
--card-bg: #ffffff; |
|
--text-primary: #1a1a1a; |
|
--text-secondary: #666666; |
|
--text-muted: #888888; |
|
--border-color: #e5e7eb; |
|
--shadow-sm: 0 1px 3px rgba(0,0,0,0.12), 0 1px 2px rgba(0,0,0,0.24); |
|
--shadow-md: 0 4px 6px rgba(0,0,0,0.07), 0 1px 3px rgba(0,0,0,0.06); |
|
--shadow-lg: 0 10px 25px rgba(0,0,0,0.1), 0 4px 10px rgba(0,0,0,0.06); |
|
--shadow-xl: 0 20px 40px rgba(0,0,0,0.1), 0 8px 20px rgba(0,0,0,0.08); |
|
--border-radius: 8px; |
|
--border-radius-sm: 6px; |
|
--border-radius-lg: 12px; |
|
} |
|
|
|
* { |
|
box-sizing: border-box; |
|
} |
|
|
|
body { |
|
margin: 0; |
|
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
|
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); |
|
color: var(--text-primary); |
|
line-height: 1.6; |
|
-webkit-font-smoothing: antialiased; |
|
-moz-osx-font-smoothing: grayscale; |
|
} |
|
|
|
.container { |
|
width: 100vw; |
|
padding: 20px; |
|
box-sizing: border-box; |
|
max-width: 1400px; |
|
margin: 0 auto; |
|
} |
|
|
|
.loading { |
|
text-align: center; |
|
padding: 80px 20px; |
|
font-size: 18px; |
|
color: var(--text-secondary); |
|
font-weight: 500; |
|
} |
|
|
|
|
|
.main-title { |
|
text-align: center; |
|
margin-bottom: 40px; |
|
padding: 0 20px; |
|
} |
|
|
|
.main-title h1 { |
|
font-size: clamp(36px, 8vw, 64px); |
|
font-weight: 900; |
|
margin: 0 0 16px 0; |
|
letter-spacing: -2px; |
|
color: var(--primary-color); |
|
position: relative; |
|
} |
|
|
|
.main-title h1::after { |
|
content: ''; |
|
position: absolute; |
|
bottom: -8px; |
|
left: 50%; |
|
transform: translateX(-50%); |
|
width: 60px; |
|
height: 4px; |
|
background: var(--primary-color); |
|
border-radius: 2px; |
|
} |
|
|
|
.main-title p { |
|
font-size: clamp(16px, 4vw, 22px); |
|
margin: 0 0 20px 0; |
|
color: var(--text-secondary); |
|
font-weight: 400; |
|
line-height: 1.5; |
|
max-width: 600px; |
|
margin-left: auto; |
|
margin-right: auto; |
|
} |
|
|
|
.aisheets-credit { |
|
font-size: 14px; |
|
color: var(--text-muted); |
|
margin-top: 16px; |
|
font-weight: 500; |
|
} |
|
|
|
.aisheets-credit a { |
|
color: var(--primary-color); |
|
text-decoration: none; |
|
font-weight: 600; |
|
transition: all 0.3s ease; |
|
padding: 4px 8px; |
|
border-radius: 6px; |
|
background: var(--primary-light); |
|
} |
|
|
|
.aisheets-credit a:hover { |
|
color: var(--primary-dark); |
|
background: rgba(14, 165, 233, 0.15); |
|
transform: translateY(-1px); |
|
} |
|
|
|
|
|
.stats-header { |
|
background: var(--card-bg); |
|
color: var(--text-primary); |
|
padding: 40px 30px; |
|
text-align: center; |
|
margin-bottom: 40px; |
|
border-radius: var(--border-radius-lg); |
|
box-shadow: var(--shadow-xl); |
|
border: 1px solid rgba(255,255,255,0.8); |
|
backdrop-filter: blur(20px); |
|
} |
|
|
|
.filter-section { |
|
background: var(--card-bg); |
|
padding: 20px 30px; |
|
margin-bottom: 30px; |
|
border-radius: var(--border-radius-lg); |
|
box-shadow: var(--shadow-xl); |
|
border: 1px solid rgba(255,255,255,0.8); |
|
backdrop-filter: blur(20px); |
|
} |
|
|
|
.filter-controls { |
|
display: flex; |
|
justify-content: center; |
|
gap: 15px; |
|
flex-wrap: wrap; |
|
align-items: center; |
|
} |
|
|
|
.filter-btn { |
|
background: #f8fafc; |
|
border: 2px solid #e5e7eb; |
|
color: #374151; |
|
padding: 8px 16px; |
|
border-radius: 6px; |
|
cursor: pointer; |
|
font-weight: 500; |
|
transition: all 0.2s ease; |
|
} |
|
|
|
.filter-btn:hover { |
|
background: var(--primary-light); |
|
border-color: var(--primary-color); |
|
color: var(--primary-dark); |
|
} |
|
|
|
.filter-btn.active { |
|
background: var(--primary-color); |
|
border-color: var(--primary-color); |
|
color: white; |
|
} |
|
|
|
.filter-label { |
|
font-weight: 600; |
|
color: var(--text-primary); |
|
margin-right: 10px; |
|
} |
|
|
|
.stats-header p { |
|
font-size: 16px; |
|
margin: 0 0 30px 0; |
|
color: var(--text-secondary); |
|
font-weight: 500; |
|
line-height: 1.6; |
|
} |
|
|
|
.win-stats { |
|
display: flex; |
|
justify-content: center; |
|
gap: 30px; |
|
margin-top: 30px; |
|
flex-wrap: wrap; |
|
} |
|
|
|
.stat { |
|
font-size: 16px; |
|
background: var(--primary-color); |
|
padding: 20px 30px; |
|
border-radius: var(--border-radius-sm); |
|
color: white; |
|
box-shadow: var(--shadow-md); |
|
transition: all 0.3s ease; |
|
border: 1px solid rgba(255,255,255,0.2); |
|
} |
|
|
|
.stat:hover { |
|
transform: translateY(-2px); |
|
box-shadow: var(--shadow-lg); |
|
} |
|
|
|
.stat .model { |
|
font-weight: 600; |
|
display: block; |
|
margin-bottom: 8px; |
|
font-size: 14px; |
|
opacity: 0.9; |
|
} |
|
|
|
.stat .wins { |
|
color: #4ade80; |
|
font-weight: 800; |
|
font-size: 24px; |
|
text-shadow: 0 1px 2px rgba(0,0,0,0.1); |
|
} |
|
|
|
|
|
.toc-container { |
|
background: var(--card-bg); |
|
border-radius: var(--border-radius-lg); |
|
box-shadow: var(--shadow-xl); |
|
margin-bottom: 40px; |
|
padding: 30px; |
|
border: 1px solid rgba(255,255,255,0.8); |
|
backdrop-filter: blur(20px); |
|
} |
|
|
|
.toc-title { |
|
font-size: 20px; |
|
font-weight: 700; |
|
margin-bottom: 20px; |
|
color: var(--text-primary); |
|
text-align: center; |
|
cursor: pointer; |
|
user-select: none; |
|
display: flex; |
|
align-items: center; |
|
justify-content: center; |
|
gap: 12px; |
|
padding: 12px; |
|
border-radius: var(--border-radius-sm); |
|
transition: all 0.3s ease; |
|
} |
|
|
|
.toc-title:hover { |
|
background: var(--primary-light); |
|
color: var(--primary-color); |
|
transform: translateY(-1px); |
|
} |
|
|
|
.toc-content { |
|
transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1); |
|
overflow: hidden; |
|
} |
|
|
|
.toc-content.collapsed { |
|
max-height: 0; |
|
margin-bottom: 0; |
|
} |
|
|
|
.toc-content.expanded { |
|
max-height: none; |
|
margin-bottom: 20px; |
|
} |
|
|
|
.toc-grid { |
|
display: grid; |
|
grid-template-columns: repeat(auto-fill, minmax(320px, 1fr)); |
|
gap: 16px; |
|
} |
|
|
|
.toc-item { |
|
background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%); |
|
border: 1px solid var(--border-color); |
|
border-radius: var(--border-radius-sm); |
|
padding: 20px; |
|
cursor: pointer; |
|
transition: all 0.3s ease; |
|
text-decoration: none; |
|
color: inherit; |
|
position: relative; |
|
overflow: hidden; |
|
} |
|
|
|
.toc-item::before { |
|
content: ''; |
|
position: absolute; |
|
top: 0; |
|
left: 0; |
|
right: 0; |
|
height: 3px; |
|
background: var(--primary-color); |
|
transform: scaleX(0); |
|
transition: transform 0.3s ease; |
|
} |
|
|
|
.toc-item:hover { |
|
background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%); |
|
border-color: var(--primary-color); |
|
transform: translateY(-4px); |
|
box-shadow: var(--shadow-lg); |
|
} |
|
|
|
.toc-item:hover::before { |
|
transform: scaleX(1); |
|
} |
|
|
|
.toc-number { |
|
font-weight: 700; |
|
color: var(--primary-color); |
|
margin-right: 12px; |
|
font-size: 16px; |
|
} |
|
|
|
.toc-description { |
|
font-size: 15px; |
|
line-height: 1.5; |
|
display: -webkit-box; |
|
-webkit-line-clamp: 2; |
|
-webkit-box-orient: vertical; |
|
overflow: hidden; |
|
font-weight: 500; |
|
} |
|
|
|
|
|
.app-section { |
|
margin-bottom: 40px; |
|
background: var(--card-bg); |
|
border-radius: var(--border-radius-lg); |
|
box-shadow: var(--shadow-xl); |
|
overflow: hidden; |
|
border: 1px solid rgba(255,255,255,0.8); |
|
backdrop-filter: blur(20px); |
|
transition: all 0.3s ease; |
|
} |
|
|
|
.app-section:hover { |
|
transform: translateY(-2px); |
|
box-shadow: 0 25px 50px rgba(0,0,0,0.15), 0 10px 25px rgba(0,0,0,0.1); |
|
} |
|
|
|
.description-header { |
|
background: #f8fafc; |
|
color: #374151; |
|
padding: 16px 20px; |
|
text-align: left; |
|
font-size: 16px; |
|
font-weight: 600; |
|
border-bottom: 1px solid #e5e7eb; |
|
cursor: pointer; |
|
user-select: none; |
|
transition: background-color 0.2s ease; |
|
} |
|
|
|
.description-header:hover { |
|
background: #f1f5f9; |
|
} |
|
|
|
|
|
.evaluation-section { |
|
background: #ffffff; |
|
border-bottom: 1px solid #e5e7eb; |
|
padding: 8px 20px; |
|
} |
|
|
|
.evaluations-container { |
|
display: none; |
|
} |
|
|
|
.evaluations-container.expanded { |
|
display: block; |
|
} |
|
|
|
.evaluation-result { |
|
background: #f9fafb; |
|
border: 1px solid #e5e7eb; |
|
border-radius: 6px; |
|
padding: 12px 16px; |
|
margin-bottom: 8px; |
|
} |
|
|
|
.eval-label { |
|
font-size: 11px; |
|
color: #6b7280; |
|
margin-bottom: 4px; |
|
font-weight: 500; |
|
text-transform: uppercase; |
|
letter-spacing: 0.5px; |
|
} |
|
|
|
.winner { |
|
color: #374151; |
|
font-weight: 600; |
|
margin-bottom: 4px; |
|
font-size: 14px; |
|
} |
|
|
|
.reason { |
|
color: #374151; |
|
font-weight: 500; |
|
} |
|
|
|
.view-eval-btn { |
|
background: #ffffff; |
|
color: #374151; |
|
border: 1px solid #d1d5db; |
|
padding: 4px 12px; |
|
border-radius: 4px; |
|
cursor: pointer; |
|
margin-top: 8px; |
|
font-size: 11px; |
|
font-weight: 500; |
|
transition: all 0.2s ease; |
|
} |
|
|
|
.view-eval-btn:hover { |
|
background: #f3f4f6; |
|
border-color: #9ca3af; |
|
} |
|
|
|
.full-evaluation { |
|
background: #ffffff; |
|
border: 1px solid #e5e7eb; |
|
border-radius: 6px; |
|
padding: 12px 16px; |
|
margin-top: 8px; |
|
display: none; |
|
} |
|
|
|
.thinking-content { |
|
max-height: 200px; |
|
overflow-y: auto; |
|
font-size: 12px; |
|
line-height: 1.5; |
|
white-space: pre-wrap; |
|
text-align: left; |
|
color: #374151; |
|
font-weight: 400; |
|
} |
|
|
|
|
|
.implementations { |
|
display: grid; |
|
grid-template-columns: 1fr 1fr; |
|
gap: 0; |
|
} |
|
|
|
.impl-panel { |
|
border-right: 1px solid var(--border-color); |
|
position: relative; |
|
} |
|
|
|
.impl-panel:last-child { |
|
border-right: none; |
|
} |
|
|
|
.impl-header { |
|
background: var(--secondary-color); |
|
color: white; |
|
padding: 16px 20px; |
|
font-weight: 700; |
|
text-align: center; |
|
font-size: 16px; |
|
letter-spacing: -0.2px; |
|
position: relative; |
|
} |
|
|
|
.impl-header::after { |
|
content: ''; |
|
position: absolute; |
|
bottom: 0; |
|
left: 0; |
|
right: 0; |
|
height: 1px; |
|
background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent); |
|
} |
|
|
|
.iframe-container { |
|
height: 600px; |
|
background: #f8fafc; |
|
position: relative; |
|
} |
|
|
|
.iframe-container::before { |
|
content: ''; |
|
position: absolute; |
|
top: 0; |
|
left: 0; |
|
right: 0; |
|
height: 1px; |
|
background: linear-gradient(90deg, transparent, var(--border-color), transparent); |
|
} |
|
|
|
iframe { |
|
width: 100%; |
|
height: 100%; |
|
border: none; |
|
transform: scale(1); |
|
transform-origin: top left; |
|
} |
|
|
|
.error { |
|
color: #ef4444; |
|
text-align: center; |
|
padding: 40px 20px; |
|
font-weight: 600; |
|
} |
|
|
|
|
|
@media (min-width: 1400px) { |
|
.iframe-container { height: 700px; } |
|
.container { padding: 30px; } |
|
} |
|
|
|
@media (max-width: 1200px) { |
|
.iframe-container { height: 500px; } |
|
.win-stats { gap: 20px; } |
|
.stat { padding: 16px 24px; } |
|
} |
|
|
|
@media (max-width: 768px) { |
|
.implementations { grid-template-columns: 1fr; } |
|
.impl-panel { border-right: none; border-bottom: 1px solid var(--border-color); } |
|
.impl-panel:last-child { border-bottom: none; } |
|
.iframe-container { height: 400px; } |
|
.container { padding: 15px; } |
|
.description-header { padding: 20px 15px; font-size: 16px; } |
|
.toc-grid { grid-template-columns: 1fr; } |
|
.win-stats { gap: 15px; } |
|
.stat { padding: 15px 20px; } |
|
.main-title h1 { font-size: 48px; } |
|
.main-title p { font-size: 18px; } |
|
} |
|
|
|
|
|
::-webkit-scrollbar { |
|
width: 8px; |
|
} |
|
|
|
::-webkit-scrollbar-track { |
|
background: #f1f5f9; |
|
border-radius: 4px; |
|
} |
|
|
|
::-webkit-scrollbar-thumb { |
|
background: var(--primary-color); |
|
border-radius: 4px; |
|
} |
|
|
|
::-webkit-scrollbar-thumb:hover { |
|
background: var(--secondary-color); |
|
} |
|
</style> |
|
</head> |
|
<body> |
|
<div class="container"> |
|
<div id="apps-container" class="loading">Loading apps from Hugging Face...</div> |
|
</div> |
|
|
|
<script> |
|
function parseEvaluation(evalText) { |
|
if (!evalText) return null; |
|
|
|
try { |
|
|
|
const chosenMatch = evalText.match(/chosen:\s*(.+?)(?:\n|$)/i); |
|
const reasonMatch = evalText.match(/reason:\s*(.+?)(?:\n|$)/is); |
|
|
|
if (chosenMatch) { |
|
return { |
|
winner: chosenMatch[1].trim(), |
|
reason: reasonMatch ? reasonMatch[1].trim() : '', |
|
fullEval: evalText |
|
}; |
|
} |
|
|
|
|
|
const model1Match = evalText.match(/model\s*1/i); |
|
const model2Match = evalText.match(/model\s*2/i); |
|
|
|
if (model1Match) { |
|
return { |
|
winner: 'Qwen3-Coder-480B-A35B-Instruct', |
|
reason: evalText, |
|
fullEval: evalText |
|
}; |
|
} else if (model2Match) { |
|
return { |
|
winner: 'gpt-oss-120b', |
|
reason: evalText, |
|
fullEval: evalText |
|
}; |
|
} |
|
|
|
return null; |
|
} catch (e) { |
|
console.error('Error parsing evaluation:', e); |
|
return null; |
|
} |
|
} |
|
|
|
function createEvaluationSection(evaluation, index, evalType = 'qwen') { |
|
const winner = evaluation.winner.toLowerCase().includes('qwen') || evaluation.winner.toLowerCase().includes('model 1') ? 'Qwen3-Coder-480B-A35B-Instruct' : |
|
evaluation.winner.toLowerCase().includes('gpt') || evaluation.winner.toLowerCase().includes('model 2') ? 'gpt-oss-120b' : |
|
evaluation.winner; |
|
|
|
const judgeLabel = evalType === 'qwen' ? 'Qwen3-Coder judge' : |
|
evalType === 'gpt' ? 'GPT-OSS judge' : |
|
'Kimi judge'; |
|
|
|
return ` |
|
<div class="evaluation-section"> |
|
<div class="eval-label">${judgeLabel}</div> |
|
<div class="evaluation-result"> |
|
<div class="winner">🏆 Winner: ${winner}</div> |
|
<button class="view-eval-btn" onclick="toggleFullEval('full-eval-${index}-${evalType}')">View Reason</button> |
|
</div> |
|
<div class="full-evaluation" id="full-eval-${index}-${evalType}"> |
|
<div class="thinking-content">${evaluation.reason}</div> |
|
</div> |
|
</div> |
|
`; |
|
} |
|
|
|
function createMajorityVoteSection(qwenEvaluation, gptEvaluation, kimiEvaluation, index) { |
|
let qwenVotes = 0; |
|
let gptOssVotes = 0; |
|
let totalVotes = 0; |
|
|
|
if (qwenEvaluation) { |
|
totalVotes++; |
|
const winner = qwenEvaluation.winner.toLowerCase(); |
|
if (winner.includes('qwen') || winner.includes('model 1')) { |
|
qwenVotes++; |
|
} else if (winner.includes('gpt') || winner.includes('model 2')) { |
|
gptOssVotes++; |
|
} |
|
} |
|
|
|
if (gptEvaluation) { |
|
totalVotes++; |
|
const winner = gptEvaluation.winner.toLowerCase(); |
|
if (winner.includes('qwen') || winner.includes('model 1')) { |
|
qwenVotes++; |
|
} else if (winner.includes('gpt') || winner.includes('model 2')) { |
|
gptOssVotes++; |
|
} |
|
} |
|
|
|
if (kimiEvaluation) { |
|
totalVotes++; |
|
const winner = kimiEvaluation.winner.toLowerCase(); |
|
if (winner.includes('qwen') || winner.includes('model 1')) { |
|
qwenVotes++; |
|
} else if (winner.includes('gpt') || winner.includes('model 2')) { |
|
gptOssVotes++; |
|
} |
|
} |
|
|
|
if (totalVotes === 0) return ''; |
|
|
|
let majorityWinner = ''; |
|
let voteBreakdown = ''; |
|
|
|
if (qwenVotes > gptOssVotes) { |
|
majorityWinner = 'Qwen3-Coder-480B-A35B-Instruct'; |
|
voteBreakdown = `Qwen: ${qwenVotes} | GPT-OSS: ${gptOssVotes}`; |
|
} else if (gptOssVotes > qwenVotes) { |
|
majorityWinner = 'gpt-oss-120b'; |
|
voteBreakdown = `Qwen: ${qwenVotes} | GPT-OSS: ${gptOssVotes}`; |
|
} else { |
|
majorityWinner = 'Tie'; |
|
voteBreakdown = `Qwen: ${qwenVotes} | GPT-OSS: ${gptOssVotes}`; |
|
} |
|
|
|
return ` |
|
<div class="evaluation-section"> |
|
<div class="eval-label">Majority Vote (${totalVotes} judges)</div> |
|
<div class="evaluation-result" style="background: #f0f9ff; border-color: #0ea5e9;"> |
|
<div class="winner" style="color: #0c4a6e;">🏆 Final Winner: ${majorityWinner}</div> |
|
<div style="font-size: 11px; color: #0c4a6e; margin-top: 4px; font-weight: 500;">Vote Breakdown: ${voteBreakdown}</div> |
|
</div> |
|
</div> |
|
`; |
|
} |
|
|
|
function calculateWinRates(rows) { |
|
let qwenMajorityWins = 0; |
|
let gptOssMajorityWins = 0; |
|
let ties = 0; |
|
let totalEvaluated = 0; |
|
|
|
|
|
let qwenWinsQwenJudge = 0; |
|
let gptOssWinsQwenJudge = 0; |
|
let qwenWinsGptJudge = 0; |
|
let gptOssWinsGptJudge = 0; |
|
let qwenWinsKimiJudge = 0; |
|
let gptOssWinsKimiJudge = 0; |
|
let totalEvaluatedQwen = 0; |
|
let totalEvaluatedGpt = 0; |
|
let totalEvaluatedKimi = 0; |
|
|
|
rows.forEach(row => { |
|
let qwenVotes = 0; |
|
let gptOssVotes = 0; |
|
let totalVotes = 0; |
|
|
|
|
|
const qwenEvaluation = parseEvaluation(row.row['eval-qwen-coder'] || ''); |
|
if (qwenEvaluation) { |
|
totalEvaluatedQwen++; |
|
totalVotes++; |
|
const winner = qwenEvaluation.winner.toLowerCase(); |
|
if (winner.includes('qwen') || winner.includes('model 1')) { |
|
qwenWinsQwenJudge++; |
|
qwenVotes++; |
|
} else if (winner.includes('gpt') || winner.includes('model 2')) { |
|
gptOssWinsQwenJudge++; |
|
gptOssVotes++; |
|
} |
|
} |
|
|
|
|
|
const gptEvaluation = parseEvaluation(row.row['eval-gpt-oss'] || ''); |
|
if (gptEvaluation) { |
|
totalEvaluatedGpt++; |
|
totalVotes++; |
|
const winner = gptEvaluation.winner.toLowerCase(); |
|
if (winner.includes('qwen') || winner.includes('model 1')) { |
|
qwenWinsGptJudge++; |
|
qwenVotes++; |
|
} else if (winner.includes('gpt') || winner.includes('model 2')) { |
|
gptOssWinsGptJudge++; |
|
gptOssVotes++; |
|
} |
|
} |
|
|
|
|
|
const kimiEvaluation = parseEvaluation(row.row['eval-kimi'] || ''); |
|
if (kimiEvaluation) { |
|
totalEvaluatedKimi++; |
|
totalVotes++; |
|
const winner = kimiEvaluation.winner.toLowerCase(); |
|
if (winner.includes('qwen') || winner.includes('model 1')) { |
|
qwenWinsKimiJudge++; |
|
qwenVotes++; |
|
} else if (winner.includes('gpt') || winner.includes('model 2')) { |
|
gptOssWinsKimiJudge++; |
|
gptOssVotes++; |
|
} |
|
} |
|
|
|
|
|
if (totalVotes > 0) { |
|
totalEvaluated++; |
|
if (qwenVotes > gptOssVotes) { |
|
qwenMajorityWins++; |
|
} else if (gptOssVotes > qwenVotes) { |
|
gptOssMajorityWins++; |
|
} else { |
|
ties++; |
|
} |
|
} |
|
}); |
|
|
|
const qwenRateQwenJudge = totalEvaluatedQwen > 0 ? Math.round((qwenWinsQwenJudge / totalEvaluatedQwen) * 100) : 0; |
|
const gptOssRateQwenJudge = totalEvaluatedQwen > 0 ? Math.round((gptOssWinsQwenJudge / totalEvaluatedQwen) * 100) : 0; |
|
const qwenRateGptJudge = totalEvaluatedGpt > 0 ? Math.round((qwenWinsGptJudge / totalEvaluatedGpt) * 100) : 0; |
|
const gptOssRateGptJudge = totalEvaluatedGpt > 0 ? Math.round((gptOssWinsGptJudge / totalEvaluatedGpt) * 100) : 0; |
|
const qwenRateKimiJudge = totalEvaluatedKimi > 0 ? Math.round((qwenWinsKimiJudge / totalEvaluatedKimi) * 100) : 0; |
|
const gptOssRateKimiJudge = totalEvaluatedKimi > 0 ? Math.round((gptOssWinsKimiJudge / totalEvaluatedKimi) * 100) : 0; |
|
|
|
const qwenMajorityRate = totalEvaluated > 0 ? Math.round((qwenMajorityWins / totalEvaluated) * 100) : 0; |
|
const gptOssMajorityRate = totalEvaluated > 0 ? Math.round((gptOssMajorityWins / totalEvaluated) * 100) : 0; |
|
|
|
return { |
|
qwenMajorityWins: qwenMajorityWins, |
|
gptOssMajorityWins: gptOssMajorityWins, |
|
ties: ties, |
|
qwenMajorityRate: qwenMajorityRate, |
|
gptOssMajorityRate: gptOssMajorityRate, |
|
qwenWinsQwenJudge: qwenWinsQwenJudge, |
|
gptOssWinsQwenJudge: gptOssWinsQwenJudge, |
|
qwenWinsGptJudge: qwenWinsGptJudge, |
|
gptOssWinsGptJudge: gptOssWinsGptJudge, |
|
qwenWinsKimiJudge: qwenWinsKimiJudge, |
|
gptOssWinsKimiJudge: gptOssWinsKimiJudge, |
|
qwenRateQwenJudge: qwenRateQwenJudge, |
|
gptOssRateQwenJudge: gptOssRateQwenJudge, |
|
qwenRateGptJudge: qwenRateGptJudge, |
|
gptOssRateGptJudge: gptOssRateGptJudge, |
|
qwenRateKimiJudge: qwenRateKimiJudge, |
|
gptOssRateKimiJudge: gptOssRateKimiJudge, |
|
totalEvaluated: totalEvaluated, |
|
totalEvaluatedQwen: totalEvaluatedQwen, |
|
totalEvaluatedGpt: totalEvaluatedGpt, |
|
totalEvaluatedKimi: totalEvaluatedKimi |
|
}; |
|
} |
|
|
|
function toggleFullEval(id) { |
|
const fullEval = document.getElementById(id); |
|
|
|
if (fullEval.style.display === 'block') { |
|
fullEval.style.display = 'none'; |
|
} else { |
|
fullEval.style.display = 'block'; |
|
} |
|
} |
|
|
|
function toggleEvaluations(index) { |
|
const evaluationsContainer = document.getElementById(`evaluations-${index}`); |
|
const header = document.getElementById(`header-${index}`); |
|
|
|
if (evaluationsContainer.classList.contains('expanded')) { |
|
evaluationsContainer.classList.remove('expanded'); |
|
header.innerHTML = `${index + 1}. ${header.getAttribute('data-description')} <span style="float: right; font-size: 12px; color: #6b7280; font-weight: 400;">View Details ▼</span>`; |
|
} else { |
|
evaluationsContainer.classList.add('expanded'); |
|
header.innerHTML = `${index + 1}. ${header.getAttribute('data-description')} <span style="float: right; font-size: 12px; color: #6b7280; font-weight: 400;">Hide Details ▲</span>`; |
|
} |
|
} |
|
|
|
function filterByWinner(winner) { |
|
const filterBtns = document.querySelectorAll('.filter-btn'); |
|
filterBtns.forEach(btn => btn.classList.remove('active')); |
|
|
|
if (winner !== 'all') { |
|
document.querySelector(`[data-filter="${winner}"]`).classList.add('active'); |
|
} else { |
|
document.querySelector('[data-filter="all"]').classList.add('active'); |
|
} |
|
|
|
const appSections = document.querySelectorAll('.app-section'); |
|
appSections.forEach(section => { |
|
const winnerDisplay = section.querySelector('.winner-display'); |
|
if (winnerDisplay) { |
|
const winnerText = winnerDisplay.textContent.toLowerCase(); |
|
if (winner === 'all' || |
|
(winner === 'qwen' && winnerText.includes('qwen3-coder')) || |
|
(winner === 'gpt' && winnerText.includes('gpt-oss')) || |
|
(winner === 'tie' && winnerText.includes('tie'))) { |
|
section.style.display = 'block'; |
|
} else { |
|
section.style.display = 'none'; |
|
} |
|
} |
|
}); |
|
} |
|
|
|
function scrollToApp(index) { |
|
const appSection = document.getElementById(`app-${index}`); |
|
if (appSection) { |
|
appSection.scrollIntoView({ behavior: 'smooth', block: 'start' }); |
|
} |
|
} |
|
|
|
function toggleTOC() { |
|
const tocContent = document.getElementById('toc-content'); |
|
const tocTitle = document.getElementById('toc-title'); |
|
const isCollapsed = tocContent.classList.contains('collapsed'); |
|
|
|
if (isCollapsed) { |
|
tocContent.classList.remove('collapsed'); |
|
tocContent.classList.add('expanded'); |
|
tocTitle.innerHTML = '📋 List of Apps ▼'; |
|
} else { |
|
tocContent.classList.remove('expanded'); |
|
tocContent.classList.add('collapsed'); |
|
tocTitle.innerHTML = '📋 List of Apps ▶'; |
|
} |
|
} |
|
|
|
async function loadAppsFromHuggingFace() { |
|
const container = document.getElementById('apps-container'); |
|
|
|
const response = await fetch('https://datasets-server.huggingface.co/rows?dataset=dvilasuero/jsvibes-qwen-gpt-oss-judged&config=default&split=train&offset=0&length=50'); |
|
const data = await response.json(); |
|
|
|
|
|
const winStats = calculateWinRates(data.rows); |
|
|
|
container.innerHTML = ` |
|
<div class="main-title"> |
|
<h1>JSVibes</h1> |
|
<p>Vibe testing open models for simple but useful (web) code tasks</p> |
|
<div class="aisheets-credit"> |
|
Built with <a href="https://huggingface.co/spaces/aisheets/sheets" target="_blank">AISheets</a> | |
|
Dataset: <a href="https://huggingface.co/datasets/dvilasuero/jsvibes-qwen-gptoss" target="_blank">jsvibes-qwen-gptoss</a> |
|
</div> |
|
</div> |
|
<div class="stats-header"> |
|
<p style="font-size: 14px; opacity: 0.8;">Automatically evaluated by Qwen3-Coder, GPT-OSS, and Kimi as judges. Results based on majority voting. Judgments are imperfect, test them yourself!</p> |
|
<div class="win-stats"> |
|
<div class="stat"> |
|
<span class="model">Qwen3-Coder-480B-A35B-Instruct</span> |
|
<span class="wins">${winStats.qwenMajorityWins} wins</span> |
|
<div style="font-size: 14px; opacity: 0.8;">Majority: ${winStats.qwenMajorityRate}% | Individual: Qwen: ${winStats.qwenRateQwenJudge}% | GPT: ${winStats.qwenRateGptJudge}% | Kimi: ${winStats.qwenRateKimiJudge}%</div> |
|
</div> |
|
<div class="stat"> |
|
<span class="model">gpt-oss-120b</span> |
|
<span class="wins">${winStats.gptOssMajorityWins} wins</span> |
|
<div style="font-size: 14px; opacity: 0.8;">Majority: ${winStats.gptOssMajorityRate}% | Individual: Qwen: ${winStats.gptOssRateQwenJudge}% | GPT: ${winStats.gptOssRateGptJudge}% | Kimi: ${winStats.gptOssRateKimiJudge}%</div> |
|
</div> |
|
<div class="stat"> |
|
<span class="model">Ties</span> |
|
<span class="wins">${winStats.ties}</span> |
|
<div style="font-size: 14px; opacity: 0.8;">Total: ${winStats.totalEvaluated}</div> |
|
</div> |
|
</div> |
|
</div> |
|
<div class="filter-section"> |
|
<div class="filter-controls"> |
|
<span class="filter-label">Filter by Winner:</span> |
|
<button class="filter-btn active" data-filter="all" onclick="filterByWinner('all')">All Apps</button> |
|
<button class="filter-btn" data-filter="qwen" onclick="filterByWinner('qwen')">Qwen Wins</button> |
|
<button class="filter-btn" data-filter="gpt" onclick="filterByWinner('gpt')">GPT-OSS Wins</button> |
|
<button class="filter-btn" data-filter="tie" onclick="filterByWinner('tie')">Ties</button> |
|
</div> |
|
</div> |
|
<div class="toc-container"> |
|
<div class="toc-title" id="toc-title" onclick="toggleTOC()">📋 List of Apps ▶</div> |
|
<div class="toc-content collapsed" id="toc-content"> |
|
<div class="toc-grid" id="toc-grid"> |
|
<!-- TOC items will be populated here --> |
|
</div> |
|
</div> |
|
</div> |
|
`; |
|
|
|
|
|
const tocGrid = document.getElementById('toc-grid'); |
|
data.rows.forEach((row, index) => { |
|
const app = row.row; |
|
const tocItem = document.createElement('div'); |
|
tocItem.className = 'toc-item'; |
|
tocItem.onclick = () => scrollToApp(index); |
|
tocItem.innerHTML = ` |
|
<div> |
|
<span class="toc-number">#${index + 1}</span> |
|
<span class="toc-description">${app.description || 'No description available'}</span> |
|
</div> |
|
`; |
|
tocGrid.appendChild(tocItem); |
|
}); |
|
|
|
data.rows.forEach((row, index) => { |
|
const app = row.row; |
|
|
|
|
|
let qwenHtml = app['qwen3-coder'] || ''; |
|
let gptOssHtml = app['gpt-oss'] || ''; |
|
|
|
if (qwenHtml.startsWith('```html')) { |
|
qwenHtml = qwenHtml.replace(/```html\n?/, '').replace(/```$/, ''); |
|
} |
|
if (gptOssHtml.startsWith('```html')) { |
|
gptOssHtml = gptOssHtml.replace(/```html\n?/, '').replace(/```$/, ''); |
|
} |
|
|
|
|
|
const qwenEvaluation = parseEvaluation(app['eval-qwen-coder'] || ''); |
|
const gptEvaluation = parseEvaluation(app['eval-gpt-oss'] || ''); |
|
const kimiEvaluation = parseEvaluation(app['eval-kimi'] || ''); |
|
|
|
console.log(`App ${index} evaluations:`, { |
|
qwen: qwenEvaluation, |
|
gpt: gptEvaluation, |
|
kimi: kimiEvaluation, |
|
qwenRaw: app['eval-qwen-coder'], |
|
gptRaw: app['eval-gpt-oss'], |
|
kimiRaw: app['eval-kimi'] |
|
}); |
|
|
|
if (!qwenEvaluation && app['eval-qwen-coder']) { |
|
console.log(`Failed to parse Qwen evaluation for app ${index}:`, app['eval-qwen-coder']); |
|
} |
|
if (!gptEvaluation && app['eval-gpt-oss']) { |
|
console.log(`Failed to parse GPT evaluation for app ${index}:`, app['eval-gpt-oss']); |
|
} |
|
if (!kimiEvaluation && app['eval-kimi']) { |
|
console.log(`Failed to parse Kimi evaluation for app ${index}:`, app['eval-kimi']); |
|
} |
|
|
|
const section = document.createElement('div'); |
|
section.className = 'app-section'; |
|
section.id = `app-${index}`; |
|
|
|
let qwenVotes = 0; |
|
let gptOssVotes = 0; |
|
let totalVotes = 0; |
|
|
|
if (qwenEvaluation) { |
|
totalVotes++; |
|
const winner = qwenEvaluation.winner.toLowerCase(); |
|
if (winner.includes('qwen') || winner.includes('model 1')) { |
|
qwenVotes++; |
|
} else if (winner.includes('gpt') || winner.includes('model 2')) { |
|
gptOssVotes++; |
|
} |
|
} |
|
|
|
if (gptEvaluation) { |
|
totalVotes++; |
|
const winner = gptEvaluation.winner.toLowerCase(); |
|
if (winner.includes('qwen') || winner.includes('model 1')) { |
|
qwenVotes++; |
|
} else if (winner.includes('gpt') || winner.includes('model 2')) { |
|
gptOssVotes++; |
|
} |
|
} |
|
|
|
if (kimiEvaluation) { |
|
totalVotes++; |
|
const winner = kimiEvaluation.winner.toLowerCase(); |
|
if (winner.includes('qwen') || winner.includes('model 1')) { |
|
qwenVotes++; |
|
} else if (winner.includes('gpt') || winner.includes('model 2')) { |
|
gptOssVotes++; |
|
} |
|
} |
|
|
|
let majorityWinner = ''; |
|
let voteBreakdown = ''; |
|
|
|
if (qwenVotes > gptOssVotes) { |
|
majorityWinner = 'Qwen3-Coder-480B-A35B-Instruct'; |
|
voteBreakdown = `Qwen: ${qwenVotes} | GPT-OSS: ${gptOssVotes}`; |
|
} else if (gptOssVotes > qwenVotes) { |
|
majorityWinner = 'gpt-oss-120b'; |
|
voteBreakdown = `Qwen: ${qwenVotes} | GPT-OSS: ${gptOssVotes}`; |
|
} else { |
|
majorityWinner = 'Tie'; |
|
voteBreakdown = `Qwen: ${qwenVotes} | GPT-OSS: ${gptOssVotes}`; |
|
} |
|
|
|
section.innerHTML = ` |
|
<div class="description-header" id="header-${index}" onclick="toggleEvaluations(${index})" data-description="${app.description || 'No description available'}"> |
|
${index + 1}. ${app.description || 'No description available'} <span style="float: right; font-size: 12px; color: #6b7280; font-weight: 400;">View Details ▼</span> |
|
</div> |
|
<div class="winner-display" style="background: #f0f9ff; border: 1px solid #0ea5e9; padding: 8px 20px; margin: 0;"> |
|
<div style="color: #0c4a6e; font-weight: 600; font-size: 14px;">🏆 Winner: ${majorityWinner}</div> |
|
<div style="color: #0c4a6e; font-size: 11px; margin-top: 2px;">Vote Breakdown: ${voteBreakdown} (${totalVotes} judges)</div> |
|
</div> |
|
<div class="evaluations-container" id="evaluations-${index}"> |
|
${qwenEvaluation ? createEvaluationSection(qwenEvaluation, index, 'qwen') : ''} |
|
${gptEvaluation ? createEvaluationSection(gptEvaluation, index, 'gpt') : ''} |
|
${kimiEvaluation ? createEvaluationSection(kimiEvaluation, index, 'kimi') : ''} |
|
</div> |
|
<div class="implementations"> |
|
<div class="impl-panel"> |
|
<div class="impl-header">Qwen3-Coder-480B-A35B-Instruct</div> |
|
<div class="iframe-container"> |
|
<iframe srcdoc="${qwenHtml.replace(/"/g, '"')}"></iframe> |
|
</div> |
|
</div> |
|
<div class="impl-panel"> |
|
<div class="impl-header">gpt-oss-120b</div> |
|
<div class="iframe-container"> |
|
<iframe srcdoc="${gptOssHtml.replace(/"/g, '"')}"></iframe> |
|
</div> |
|
</div> |
|
</div> |
|
`; |
|
|
|
container.appendChild(section); |
|
}); |
|
} |
|
|
|
|
|
loadAppsFromHuggingFace(); |
|
</script> |
|
</body> |
|
</html> |