dvilasuero HF Staff commited on
Commit
2e235dd
·
verified ·
1 Parent(s): c2b50e9

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +471 -128
index.html CHANGED
@@ -7,9 +7,15 @@
7
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800;900&display=swap');
8
 
9
  :root {
10
- --primary-gradient: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
11
- --secondary-gradient: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
12
- --accent-gradient: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
 
 
 
 
 
 
13
  --dark-bg: #0a0a0a;
14
  --card-bg: #ffffff;
15
  --text-primary: #1a1a1a;
@@ -20,9 +26,9 @@
20
  --shadow-md: 0 4px 6px rgba(0,0,0,0.07), 0 1px 3px rgba(0,0,0,0.06);
21
  --shadow-lg: 0 10px 25px rgba(0,0,0,0.1), 0 4px 10px rgba(0,0,0,0.06);
22
  --shadow-xl: 0 20px 40px rgba(0,0,0,0.1), 0 8px 20px rgba(0,0,0,0.08);
23
- --border-radius: 16px;
24
- --border-radius-sm: 12px;
25
- --border-radius-lg: 24px;
26
  }
27
 
28
  * {
@@ -67,10 +73,7 @@
67
  font-weight: 900;
68
  margin: 0 0 16px 0;
69
  letter-spacing: -2px;
70
- background: var(--primary-gradient);
71
- -webkit-background-clip: text;
72
- -webkit-text-fill-color: transparent;
73
- background-clip: text;
74
  position: relative;
75
  }
76
 
@@ -82,7 +85,7 @@
82
  transform: translateX(-50%);
83
  width: 60px;
84
  height: 4px;
85
- background: var(--accent-gradient);
86
  border-radius: 2px;
87
  }
88
 
@@ -105,18 +108,18 @@
105
  }
106
 
107
  .aisheets-credit a {
108
- color: #667eea;
109
  text-decoration: none;
110
  font-weight: 600;
111
  transition: all 0.3s ease;
112
  padding: 4px 8px;
113
  border-radius: 6px;
114
- background: rgba(102, 126, 234, 0.1);
115
  }
116
 
117
  .aisheets-credit a:hover {
118
- color: #764ba2;
119
- background: rgba(118, 75, 162, 0.1);
120
  transform: translateY(-1px);
121
  }
122
 
@@ -133,6 +136,53 @@
133
  backdrop-filter: blur(20px);
134
  }
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  .stats-header p {
137
  font-size: 16px;
138
  margin: 0 0 30px 0;
@@ -151,7 +201,7 @@
151
 
152
  .stat {
153
  font-size: 16px;
154
- background: var(--primary-gradient);
155
  padding: 20px 30px;
156
  border-radius: var(--border-radius-sm);
157
  color: white;
@@ -209,8 +259,8 @@
209
  }
210
 
211
  .toc-title:hover {
212
- background: rgba(102, 126, 234, 0.1);
213
- color: #667eea;
214
  transform: translateY(-1px);
215
  }
216
 
@@ -255,14 +305,14 @@
255
  left: 0;
256
  right: 0;
257
  height: 3px;
258
- background: var(--accent-gradient);
259
  transform: scaleX(0);
260
  transition: transform 0.3s ease;
261
  }
262
 
263
  .toc-item:hover {
264
  background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
265
- border-color: #667eea;
266
  transform: translateY(-4px);
267
  box-shadow: var(--shadow-lg);
268
  }
@@ -273,7 +323,7 @@
273
 
274
  .toc-number {
275
  font-weight: 700;
276
- color: #667eea;
277
  margin-right: 12px;
278
  font-size: 16px;
279
  }
@@ -306,100 +356,102 @@
306
  }
307
 
308
  .description-header {
309
- background: var(--primary-gradient);
310
- color: white;
311
- padding: 25px 20px;
312
- text-align: center;
313
- font-size: 18px;
314
- font-weight: 700;
315
- letter-spacing: -0.3px;
316
- position: relative;
 
 
317
  }
318
 
319
- .description-header::after {
320
- content: '';
321
- position: absolute;
322
- bottom: 0;
323
- left: 0;
324
- right: 0;
325
- height: 1px;
326
- background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent);
327
  }
328
 
329
  /* Evaluation Section */
330
  .evaluation-section {
331
- background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%);
332
- border-bottom: 1px solid var(--border-color);
333
- padding: 20px;
 
 
 
 
 
 
 
 
334
  }
335
 
336
  .evaluation-result {
337
- background: linear-gradient(135deg, #dcfce7 0%, #bbf7d0 100%);
338
- border: 1px solid #86efac;
339
- border-radius: var(--border-radius-sm);
340
- padding: 20px;
341
- margin-bottom: 15px;
342
- box-shadow: var(--shadow-sm);
343
  }
344
 
345
  .eval-label {
346
- font-size: 12px;
347
- color: var(--text-muted);
348
- margin-bottom: 8px;
349
- font-weight: 600;
350
  text-transform: uppercase;
351
  letter-spacing: 0.5px;
352
  }
353
 
354
  .winner {
355
- color: #166534;
356
- font-weight: 800;
357
- margin-bottom: 8px;
358
- font-size: 16px;
359
  }
360
 
361
  .reason {
362
- color: #166534;
363
  font-weight: 500;
364
  }
365
 
366
  .view-eval-btn {
367
- background: var(--primary-gradient);
368
- color: white;
369
- border: none;
370
- padding: 8px 16px;
371
- border-radius: var(--border-radius-sm);
372
  cursor: pointer;
373
- margin-top: 12px;
374
- font-size: 13px;
375
- font-weight: 600;
376
- transition: all 0.3s ease;
377
  }
378
 
379
  .view-eval-btn:hover {
380
- transform: translateY(-1px);
381
- box-shadow: var(--shadow-md);
382
  }
383
 
384
  .full-evaluation {
385
- background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%);
386
- border: 1px solid #fbbf24;
387
- border-radius: var(--border-radius-sm);
388
- padding: 20px;
389
- margin-top: 15px;
390
  display: none;
391
- box-shadow: var(--shadow-sm);
392
  }
393
 
394
  .thinking-content {
395
- max-height: 300px;
396
  overflow-y: auto;
397
- font-size: 14px;
398
- line-height: 1.6;
399
  white-space: pre-wrap;
400
  text-align: left;
401
- color: #92400e;
402
- font-weight: 500;
403
  }
404
 
405
  /* Implementation Panels */
@@ -419,7 +471,7 @@
419
  }
420
 
421
  .impl-header {
422
- background: var(--secondary-gradient);
423
  color: white;
424
  padding: 16px 20px;
425
  font-weight: 700;
@@ -507,12 +559,12 @@
507
  }
508
 
509
  ::-webkit-scrollbar-thumb {
510
- background: var(--primary-gradient);
511
  border-radius: 4px;
512
  }
513
 
514
  ::-webkit-scrollbar-thumb:hover {
515
- background: var(--secondary-gradient);
516
  }
517
  </style>
518
  </head>
@@ -538,6 +590,24 @@
538
  };
539
  }
540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
  return null;
542
  } catch (e) {
543
  console.error('Error parsing evaluation:', e);
@@ -545,61 +615,208 @@
545
  }
546
  }
547
 
548
- function createEvaluationSection(evaluation, index) {
549
- const winner = evaluation.winner.toLowerCase().includes('kimi') ? 'Kimi-K2' :
550
- evaluation.winner.toLowerCase().includes('qwen') ? 'Qwen3-Coder' :
551
  evaluation.winner;
552
 
 
 
 
 
553
  return `
554
  <div class="evaluation-section">
555
- <div class="eval-label">(Kimi-K2 judge)</div>
556
  <div class="evaluation-result">
557
  <div class="winner">🏆 Winner: ${winner}</div>
558
- <button class="view-eval-btn" onclick="toggleFullEval(${index})">View Reason</button>
559
  </div>
560
- <div class="full-evaluation" id="full-eval-${index}">
561
  <div class="thinking-content">${evaluation.reason}</div>
562
  </div>
563
  </div>
564
  `;
565
  }
566
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
  function calculateWinRates(rows) {
568
- let kimiWins = 0;
569
- let qwenWins = 0;
570
  let ties = 0;
571
  let totalEvaluated = 0;
572
 
 
 
 
 
 
 
 
 
 
 
 
573
  rows.forEach(row => {
574
- const evaluation = parseEvaluation(row.row['r1-evaluation'] || '');
575
- if (evaluation) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
576
  totalEvaluated++;
577
- const winner = evaluation.winner.toLowerCase();
578
- if (winner.includes('kimi')) {
579
- kimiWins++;
580
- } else if (winner.includes('qwen')) {
581
- qwenWins++;
582
  } else {
583
  ties++;
584
  }
585
  }
586
  });
587
 
588
- const kimiRate = totalEvaluated > 0 ? Math.round((kimiWins / totalEvaluated) * 100) : 0;
589
- const qwenRate = totalEvaluated > 0 ? Math.round((qwenWins / totalEvaluated) * 100) : 0;
 
 
 
 
 
 
 
590
 
591
  return {
592
- kimi: kimiWins,
593
- qwen: qwenWins,
594
  ties: ties,
595
- kimiRate: kimiRate,
596
- qwenRate: qwenRate,
597
- total: totalEvaluated
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
  };
599
  }
600
 
601
- function toggleFullEval(index) {
602
- const fullEval = document.getElementById(`full-eval-${index}`);
603
 
604
  if (fullEval.style.display === 'block') {
605
  fullEval.style.display = 'none';
@@ -608,6 +825,46 @@
608
  }
609
  }
610
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
  function scrollToApp(index) {
612
  const appSection = document.getElementById(`app-${index}`);
613
  if (appSection) {
@@ -634,7 +891,7 @@
634
  async function loadAppsFromHuggingFace() {
635
  const container = document.getElementById('apps-container');
636
 
637
- const response = await fetch('https://datasets-server.huggingface.co/rows?dataset=dvilasuero/JSVibes&config=default&split=train&offset=0&length=50');
638
  const data = await response.json();
639
 
640
  // Calculate win rates
@@ -645,28 +902,39 @@
645
  <h1>JSVibes</h1>
646
  <p>Vibe testing open models for simple but useful (web) code tasks</p>
647
  <div class="aisheets-credit">
648
- Built with <a href="https://huggingface.co/spaces/aisheets/sheets" target="_blank">AISheets</a>
 
649
  </div>
650
  </div>
651
  <div class="stats-header">
652
- <p style="font-size: 14px; opacity: 0.8;">Automatically evaluated by Kimi K2 as a judge. Judgments are imperfect, test them yourself!</p>
653
  <div class="win-stats">
654
  <div class="stat">
655
- <span class="model">Kimi-K2</span>
656
- <span class="wins">${winStats.kimi} wins</span>
657
- <div style="font-size: 14px; opacity: 0.8;">${winStats.kimiRate}%</div>
658
  </div>
659
  <div class="stat">
660
- <span class="model">Qwen3-Coder</span>
661
- <span class="wins">${winStats.qwen} wins</span>
662
- <div style="font-size: 14px; opacity: 0.8;">${winStats.qwenRate}%</div>
663
  </div>
664
  <div class="stat">
665
  <span class="model">Ties</span>
666
  <span class="wins">${winStats.ties}</span>
 
667
  </div>
668
  </div>
669
  </div>
 
 
 
 
 
 
 
 
 
670
  <div class="toc-container">
671
  <div class="toc-title" id="toc-title" onclick="toggleTOC()">📋 List of Apps ▶</div>
672
  <div class="toc-content collapsed" id="toc-content">
@@ -697,41 +965,116 @@
697
  const app = row.row;
698
 
699
  // Clean HTML content by removing markdown code blocks
700
- let kimiHtml = app['kimi-k2'] || '';
701
  let qwenHtml = app['qwen3-coder'] || '';
 
702
 
703
- if (kimiHtml.startsWith('```html')) {
704
- kimiHtml = kimiHtml.replace(/```html\n?/, '').replace(/```$/, '');
705
- }
706
  if (qwenHtml.startsWith('```html')) {
707
  qwenHtml = qwenHtml.replace(/```html\n?/, '').replace(/```$/, '');
708
  }
 
 
 
 
 
 
 
 
709
 
710
- // Parse evaluation data
711
- const evaluation = parseEvaluation(app['r1-evaluation'] || '');
712
- if (!evaluation && app['r1-evaluation']) {
713
- console.log(`Failed to parse evaluation for app ${index}:`, app['r1-evaluation']);
 
 
 
 
 
 
 
 
 
 
 
 
 
714
  }
715
 
716
  const section = document.createElement('div');
717
  section.className = 'app-section';
718
  section.id = `app-${index}`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719
  section.innerHTML = `
720
- <div class="description-header">
721
- ${index + 1}. ${app.description || 'No description available'}
 
 
 
 
 
 
 
 
 
722
  </div>
723
- ${evaluation ? createEvaluationSection(evaluation, index) : ''}
724
  <div class="implementations">
725
  <div class="impl-panel">
726
- <div class="impl-header">Kimi-K2</div>
727
  <div class="iframe-container">
728
- <iframe srcdoc="${kimiHtml.replace(/"/g, '&quot;')}"></iframe>
729
  </div>
730
  </div>
731
  <div class="impl-panel">
732
- <div class="impl-header">Qwen3-Coder</div>
733
  <div class="iframe-container">
734
- <iframe srcdoc="${qwenHtml.replace(/"/g, '&quot;')}"></iframe>
735
  </div>
736
  </div>
737
  </div>
 
7
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800;900&display=swap');
8
 
9
  :root {
10
+ --primary-color: #0ea5e9;
11
+ --primary-dark: #0c4a6e;
12
+ --primary-light: #f0f9ff;
13
+ --secondary-color: #059669;
14
+ --secondary-dark: #064e3b;
15
+ --secondary-light: #ecfdf5;
16
+ --accent-color: #7c3aed;
17
+ --accent-dark: #4c1d95;
18
+ --accent-light: #f3f4f6;
19
  --dark-bg: #0a0a0a;
20
  --card-bg: #ffffff;
21
  --text-primary: #1a1a1a;
 
26
  --shadow-md: 0 4px 6px rgba(0,0,0,0.07), 0 1px 3px rgba(0,0,0,0.06);
27
  --shadow-lg: 0 10px 25px rgba(0,0,0,0.1), 0 4px 10px rgba(0,0,0,0.06);
28
  --shadow-xl: 0 20px 40px rgba(0,0,0,0.1), 0 8px 20px rgba(0,0,0,0.08);
29
+ --border-radius: 8px;
30
+ --border-radius-sm: 6px;
31
+ --border-radius-lg: 12px;
32
  }
33
 
34
  * {
 
73
  font-weight: 900;
74
  margin: 0 0 16px 0;
75
  letter-spacing: -2px;
76
+ color: var(--primary-color);
 
 
 
77
  position: relative;
78
  }
79
 
 
85
  transform: translateX(-50%);
86
  width: 60px;
87
  height: 4px;
88
+ background: var(--primary-color);
89
  border-radius: 2px;
90
  }
91
 
 
108
  }
109
 
110
  .aisheets-credit a {
111
+ color: var(--primary-color);
112
  text-decoration: none;
113
  font-weight: 600;
114
  transition: all 0.3s ease;
115
  padding: 4px 8px;
116
  border-radius: 6px;
117
+ background: var(--primary-light);
118
  }
119
 
120
  .aisheets-credit a:hover {
121
+ color: var(--primary-dark);
122
+ background: rgba(14, 165, 233, 0.15);
123
  transform: translateY(-1px);
124
  }
125
 
 
136
  backdrop-filter: blur(20px);
137
  }
138
 
139
+ .filter-section {
140
+ background: var(--card-bg);
141
+ padding: 20px 30px;
142
+ margin-bottom: 30px;
143
+ border-radius: var(--border-radius-lg);
144
+ box-shadow: var(--shadow-xl);
145
+ border: 1px solid rgba(255,255,255,0.8);
146
+ backdrop-filter: blur(20px);
147
+ }
148
+
149
+ .filter-controls {
150
+ display: flex;
151
+ justify-content: center;
152
+ gap: 15px;
153
+ flex-wrap: wrap;
154
+ align-items: center;
155
+ }
156
+
157
+ .filter-btn {
158
+ background: #f8fafc;
159
+ border: 2px solid #e5e7eb;
160
+ color: #374151;
161
+ padding: 8px 16px;
162
+ border-radius: 6px;
163
+ cursor: pointer;
164
+ font-weight: 500;
165
+ transition: all 0.2s ease;
166
+ }
167
+
168
+ .filter-btn:hover {
169
+ background: var(--primary-light);
170
+ border-color: var(--primary-color);
171
+ color: var(--primary-dark);
172
+ }
173
+
174
+ .filter-btn.active {
175
+ background: var(--primary-color);
176
+ border-color: var(--primary-color);
177
+ color: white;
178
+ }
179
+
180
+ .filter-label {
181
+ font-weight: 600;
182
+ color: var(--text-primary);
183
+ margin-right: 10px;
184
+ }
185
+
186
  .stats-header p {
187
  font-size: 16px;
188
  margin: 0 0 30px 0;
 
201
 
202
  .stat {
203
  font-size: 16px;
204
+ background: var(--primary-color);
205
  padding: 20px 30px;
206
  border-radius: var(--border-radius-sm);
207
  color: white;
 
259
  }
260
 
261
  .toc-title:hover {
262
+ background: var(--primary-light);
263
+ color: var(--primary-color);
264
  transform: translateY(-1px);
265
  }
266
 
 
305
  left: 0;
306
  right: 0;
307
  height: 3px;
308
+ background: var(--primary-color);
309
  transform: scaleX(0);
310
  transition: transform 0.3s ease;
311
  }
312
 
313
  .toc-item:hover {
314
  background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
315
+ border-color: var(--primary-color);
316
  transform: translateY(-4px);
317
  box-shadow: var(--shadow-lg);
318
  }
 
323
 
324
  .toc-number {
325
  font-weight: 700;
326
+ color: var(--primary-color);
327
  margin-right: 12px;
328
  font-size: 16px;
329
  }
 
356
  }
357
 
358
  .description-header {
359
+ background: #f8fafc;
360
+ color: #374151;
361
+ padding: 16px 20px;
362
+ text-align: left;
363
+ font-size: 16px;
364
+ font-weight: 600;
365
+ border-bottom: 1px solid #e5e7eb;
366
+ cursor: pointer;
367
+ user-select: none;
368
+ transition: background-color 0.2s ease;
369
  }
370
 
371
+ .description-header:hover {
372
+ background: #f1f5f9;
 
 
 
 
 
 
373
  }
374
 
375
  /* Evaluation Section */
376
  .evaluation-section {
377
+ background: #ffffff;
378
+ border-bottom: 1px solid #e5e7eb;
379
+ padding: 8px 20px;
380
+ }
381
+
382
+ .evaluations-container {
383
+ display: none;
384
+ }
385
+
386
+ .evaluations-container.expanded {
387
+ display: block;
388
  }
389
 
390
  .evaluation-result {
391
+ background: #f9fafb;
392
+ border: 1px solid #e5e7eb;
393
+ border-radius: 6px;
394
+ padding: 12px 16px;
395
+ margin-bottom: 8px;
 
396
  }
397
 
398
  .eval-label {
399
+ font-size: 11px;
400
+ color: #6b7280;
401
+ margin-bottom: 4px;
402
+ font-weight: 500;
403
  text-transform: uppercase;
404
  letter-spacing: 0.5px;
405
  }
406
 
407
  .winner {
408
+ color: #374151;
409
+ font-weight: 600;
410
+ margin-bottom: 4px;
411
+ font-size: 14px;
412
  }
413
 
414
  .reason {
415
+ color: #374151;
416
  font-weight: 500;
417
  }
418
 
419
  .view-eval-btn {
420
+ background: #ffffff;
421
+ color: #374151;
422
+ border: 1px solid #d1d5db;
423
+ padding: 4px 12px;
424
+ border-radius: 4px;
425
  cursor: pointer;
426
+ margin-top: 8px;
427
+ font-size: 11px;
428
+ font-weight: 500;
429
+ transition: all 0.2s ease;
430
  }
431
 
432
  .view-eval-btn:hover {
433
+ background: #f3f4f6;
434
+ border-color: #9ca3af;
435
  }
436
 
437
  .full-evaluation {
438
+ background: #ffffff;
439
+ border: 1px solid #e5e7eb;
440
+ border-radius: 6px;
441
+ padding: 12px 16px;
442
+ margin-top: 8px;
443
  display: none;
 
444
  }
445
 
446
  .thinking-content {
447
+ max-height: 200px;
448
  overflow-y: auto;
449
+ font-size: 12px;
450
+ line-height: 1.5;
451
  white-space: pre-wrap;
452
  text-align: left;
453
+ color: #374151;
454
+ font-weight: 400;
455
  }
456
 
457
  /* Implementation Panels */
 
471
  }
472
 
473
  .impl-header {
474
+ background: var(--secondary-color);
475
  color: white;
476
  padding: 16px 20px;
477
  font-weight: 700;
 
559
  }
560
 
561
  ::-webkit-scrollbar-thumb {
562
+ background: var(--primary-color);
563
  border-radius: 4px;
564
  }
565
 
566
  ::-webkit-scrollbar-thumb:hover {
567
+ background: var(--secondary-color);
568
  }
569
  </style>
570
  </head>
 
590
  };
591
  }
592
 
593
+ // Also check for model 1/model 2 patterns
594
+ const model1Match = evalText.match(/model\s*1/i);
595
+ const model2Match = evalText.match(/model\s*2/i);
596
+
597
+ if (model1Match) {
598
+ return {
599
+ winner: 'Qwen3-Coder-480B-A35B-Instruct',
600
+ reason: evalText,
601
+ fullEval: evalText
602
+ };
603
+ } else if (model2Match) {
604
+ return {
605
+ winner: 'gpt-oss-120b',
606
+ reason: evalText,
607
+ fullEval: evalText
608
+ };
609
+ }
610
+
611
  return null;
612
  } catch (e) {
613
  console.error('Error parsing evaluation:', e);
 
615
  }
616
  }
617
 
618
+ function createEvaluationSection(evaluation, index, evalType = 'qwen') {
619
+ const winner = evaluation.winner.toLowerCase().includes('qwen') || evaluation.winner.toLowerCase().includes('model 1') ? 'Qwen3-Coder-480B-A35B-Instruct' :
620
+ evaluation.winner.toLowerCase().includes('gpt') || evaluation.winner.toLowerCase().includes('model 2') ? 'gpt-oss-120b' :
621
  evaluation.winner;
622
 
623
+ const judgeLabel = evalType === 'qwen' ? 'Qwen3-Coder judge' :
624
+ evalType === 'gpt' ? 'GPT-OSS judge' :
625
+ 'Kimi judge';
626
+
627
  return `
628
  <div class="evaluation-section">
629
+ <div class="eval-label">${judgeLabel}</div>
630
  <div class="evaluation-result">
631
  <div class="winner">🏆 Winner: ${winner}</div>
632
+ <button class="view-eval-btn" onclick="toggleFullEval('full-eval-${index}-${evalType}')">View Reason</button>
633
  </div>
634
+ <div class="full-evaluation" id="full-eval-${index}-${evalType}">
635
  <div class="thinking-content">${evaluation.reason}</div>
636
  </div>
637
  </div>
638
  `;
639
  }
640
 
641
+ function createMajorityVoteSection(qwenEvaluation, gptEvaluation, kimiEvaluation, index) {
642
+ let qwenVotes = 0;
643
+ let gptOssVotes = 0;
644
+ let totalVotes = 0;
645
+
646
+ if (qwenEvaluation) {
647
+ totalVotes++;
648
+ const winner = qwenEvaluation.winner.toLowerCase();
649
+ if (winner.includes('qwen') || winner.includes('model 1')) {
650
+ qwenVotes++;
651
+ } else if (winner.includes('gpt') || winner.includes('model 2')) {
652
+ gptOssVotes++;
653
+ }
654
+ }
655
+
656
+ if (gptEvaluation) {
657
+ totalVotes++;
658
+ const winner = gptEvaluation.winner.toLowerCase();
659
+ if (winner.includes('qwen') || winner.includes('model 1')) {
660
+ qwenVotes++;
661
+ } else if (winner.includes('gpt') || winner.includes('model 2')) {
662
+ gptOssVotes++;
663
+ }
664
+ }
665
+
666
+ if (kimiEvaluation) {
667
+ totalVotes++;
668
+ const winner = kimiEvaluation.winner.toLowerCase();
669
+ if (winner.includes('qwen') || winner.includes('model 1')) {
670
+ qwenVotes++;
671
+ } else if (winner.includes('gpt') || winner.includes('model 2')) {
672
+ gptOssVotes++;
673
+ }
674
+ }
675
+
676
+ if (totalVotes === 0) return '';
677
+
678
+ let majorityWinner = '';
679
+ let voteBreakdown = '';
680
+
681
+ if (qwenVotes > gptOssVotes) {
682
+ majorityWinner = 'Qwen3-Coder-480B-A35B-Instruct';
683
+ voteBreakdown = `Qwen: ${qwenVotes} | GPT-OSS: ${gptOssVotes}`;
684
+ } else if (gptOssVotes > qwenVotes) {
685
+ majorityWinner = 'gpt-oss-120b';
686
+ voteBreakdown = `Qwen: ${qwenVotes} | GPT-OSS: ${gptOssVotes}`;
687
+ } else {
688
+ majorityWinner = 'Tie';
689
+ voteBreakdown = `Qwen: ${qwenVotes} | GPT-OSS: ${gptOssVotes}`;
690
+ }
691
+
692
+ return `
693
+ <div class="evaluation-section">
694
+ <div class="eval-label">Majority Vote (${totalVotes} judges)</div>
695
+ <div class="evaluation-result" style="background: #f0f9ff; border-color: #0ea5e9;">
696
+ <div class="winner" style="color: #0c4a6e;">🏆 Final Winner: ${majorityWinner}</div>
697
+ <div style="font-size: 11px; color: #0c4a6e; margin-top: 4px; font-weight: 500;">Vote Breakdown: ${voteBreakdown}</div>
698
+ </div>
699
+ </div>
700
+ `;
701
+ }
702
+
703
  function calculateWinRates(rows) {
704
+ let qwenMajorityWins = 0;
705
+ let gptOssMajorityWins = 0;
706
  let ties = 0;
707
  let totalEvaluated = 0;
708
 
709
+ // Individual judge stats for display
710
+ let qwenWinsQwenJudge = 0;
711
+ let gptOssWinsQwenJudge = 0;
712
+ let qwenWinsGptJudge = 0;
713
+ let gptOssWinsGptJudge = 0;
714
+ let qwenWinsKimiJudge = 0;
715
+ let gptOssWinsKimiJudge = 0;
716
+ let totalEvaluatedQwen = 0;
717
+ let totalEvaluatedGpt = 0;
718
+ let totalEvaluatedKimi = 0;
719
+
720
  rows.forEach(row => {
721
+ let qwenVotes = 0;
722
+ let gptOssVotes = 0;
723
+ let totalVotes = 0;
724
+
725
+ // Qwen judge evaluations
726
+ const qwenEvaluation = parseEvaluation(row.row['eval-qwen-coder'] || '');
727
+ if (qwenEvaluation) {
728
+ totalEvaluatedQwen++;
729
+ totalVotes++;
730
+ const winner = qwenEvaluation.winner.toLowerCase();
731
+ if (winner.includes('qwen') || winner.includes('model 1')) {
732
+ qwenWinsQwenJudge++;
733
+ qwenVotes++;
734
+ } else if (winner.includes('gpt') || winner.includes('model 2')) {
735
+ gptOssWinsQwenJudge++;
736
+ gptOssVotes++;
737
+ }
738
+ }
739
+
740
+ // GPT-OSS judge evaluations
741
+ const gptEvaluation = parseEvaluation(row.row['eval-gpt-oss'] || '');
742
+ if (gptEvaluation) {
743
+ totalEvaluatedGpt++;
744
+ totalVotes++;
745
+ const winner = gptEvaluation.winner.toLowerCase();
746
+ if (winner.includes('qwen') || winner.includes('model 1')) {
747
+ qwenWinsGptJudge++;
748
+ qwenVotes++;
749
+ } else if (winner.includes('gpt') || winner.includes('model 2')) {
750
+ gptOssWinsGptJudge++;
751
+ gptOssVotes++;
752
+ }
753
+ }
754
+
755
+ // Kimi judge evaluations
756
+ const kimiEvaluation = parseEvaluation(row.row['eval-kimi'] || '');
757
+ if (kimiEvaluation) {
758
+ totalEvaluatedKimi++;
759
+ totalVotes++;
760
+ const winner = kimiEvaluation.winner.toLowerCase();
761
+ if (winner.includes('qwen') || winner.includes('model 1')) {
762
+ qwenWinsKimiJudge++;
763
+ qwenVotes++;
764
+ } else if (winner.includes('gpt') || winner.includes('model 2')) {
765
+ gptOssWinsKimiJudge++;
766
+ gptOssVotes++;
767
+ }
768
+ }
769
+
770
+ // Calculate majority vote for this app
771
+ if (totalVotes > 0) {
772
  totalEvaluated++;
773
+ if (qwenVotes > gptOssVotes) {
774
+ qwenMajorityWins++;
775
+ } else if (gptOssVotes > qwenVotes) {
776
+ gptOssMajorityWins++;
 
777
  } else {
778
  ties++;
779
  }
780
  }
781
  });
782
 
783
+ const qwenRateQwenJudge = totalEvaluatedQwen > 0 ? Math.round((qwenWinsQwenJudge / totalEvaluatedQwen) * 100) : 0;
784
+ const gptOssRateQwenJudge = totalEvaluatedQwen > 0 ? Math.round((gptOssWinsQwenJudge / totalEvaluatedQwen) * 100) : 0;
785
+ const qwenRateGptJudge = totalEvaluatedGpt > 0 ? Math.round((qwenWinsGptJudge / totalEvaluatedGpt) * 100) : 0;
786
+ const gptOssRateGptJudge = totalEvaluatedGpt > 0 ? Math.round((gptOssWinsGptJudge / totalEvaluatedGpt) * 100) : 0;
787
+ const qwenRateKimiJudge = totalEvaluatedKimi > 0 ? Math.round((qwenWinsKimiJudge / totalEvaluatedKimi) * 100) : 0;
788
+ const gptOssRateKimiJudge = totalEvaluatedKimi > 0 ? Math.round((gptOssWinsKimiJudge / totalEvaluatedKimi) * 100) : 0;
789
+
790
+ const qwenMajorityRate = totalEvaluated > 0 ? Math.round((qwenMajorityWins / totalEvaluated) * 100) : 0;
791
+ const gptOssMajorityRate = totalEvaluated > 0 ? Math.round((gptOssMajorityWins / totalEvaluated) * 100) : 0;
792
 
793
  return {
794
+ qwenMajorityWins: qwenMajorityWins,
795
+ gptOssMajorityWins: gptOssMajorityWins,
796
  ties: ties,
797
+ qwenMajorityRate: qwenMajorityRate,
798
+ gptOssMajorityRate: gptOssMajorityRate,
799
+ qwenWinsQwenJudge: qwenWinsQwenJudge,
800
+ gptOssWinsQwenJudge: gptOssWinsQwenJudge,
801
+ qwenWinsGptJudge: qwenWinsGptJudge,
802
+ gptOssWinsGptJudge: gptOssWinsGptJudge,
803
+ qwenWinsKimiJudge: qwenWinsKimiJudge,
804
+ gptOssWinsKimiJudge: gptOssWinsKimiJudge,
805
+ qwenRateQwenJudge: qwenRateQwenJudge,
806
+ gptOssRateQwenJudge: gptOssRateQwenJudge,
807
+ qwenRateGptJudge: qwenRateGptJudge,
808
+ gptOssRateGptJudge: gptOssRateGptJudge,
809
+ qwenRateKimiJudge: qwenRateKimiJudge,
810
+ gptOssRateKimiJudge: gptOssRateKimiJudge,
811
+ totalEvaluated: totalEvaluated,
812
+ totalEvaluatedQwen: totalEvaluatedQwen,
813
+ totalEvaluatedGpt: totalEvaluatedGpt,
814
+ totalEvaluatedKimi: totalEvaluatedKimi
815
  };
816
  }
817
 
818
+ function toggleFullEval(id) {
819
+ const fullEval = document.getElementById(id);
820
 
821
  if (fullEval.style.display === 'block') {
822
  fullEval.style.display = 'none';
 
825
  }
826
  }
827
 
828
+ function toggleEvaluations(index) {
829
+ const evaluationsContainer = document.getElementById(`evaluations-${index}`);
830
+ const header = document.getElementById(`header-${index}`);
831
+
832
+ if (evaluationsContainer.classList.contains('expanded')) {
833
+ evaluationsContainer.classList.remove('expanded');
834
+ header.innerHTML = `${index + 1}. ${header.getAttribute('data-description')} <span style="float: right; font-size: 12px; color: #6b7280; font-weight: 400;">View Details ▼</span>`;
835
+ } else {
836
+ evaluationsContainer.classList.add('expanded');
837
+ header.innerHTML = `${index + 1}. ${header.getAttribute('data-description')} <span style="float: right; font-size: 12px; color: #6b7280; font-weight: 400;">Hide Details ▲</span>`;
838
+ }
839
+ }
840
+
841
+ function filterByWinner(winner) {
842
+ const filterBtns = document.querySelectorAll('.filter-btn');
843
+ filterBtns.forEach(btn => btn.classList.remove('active'));
844
+
845
+ if (winner !== 'all') {
846
+ document.querySelector(`[data-filter="${winner}"]`).classList.add('active');
847
+ } else {
848
+ document.querySelector('[data-filter="all"]').classList.add('active');
849
+ }
850
+
851
+ const appSections = document.querySelectorAll('.app-section');
852
+ appSections.forEach(section => {
853
+ const winnerDisplay = section.querySelector('.winner-display');
854
+ if (winnerDisplay) {
855
+ const winnerText = winnerDisplay.textContent.toLowerCase();
856
+ if (winner === 'all' ||
857
+ (winner === 'qwen' && winnerText.includes('qwen3-coder')) ||
858
+ (winner === 'gpt' && winnerText.includes('gpt-oss')) ||
859
+ (winner === 'tie' && winnerText.includes('tie'))) {
860
+ section.style.display = 'block';
861
+ } else {
862
+ section.style.display = 'none';
863
+ }
864
+ }
865
+ });
866
+ }
867
+
868
  function scrollToApp(index) {
869
  const appSection = document.getElementById(`app-${index}`);
870
  if (appSection) {
 
891
  async function loadAppsFromHuggingFace() {
892
  const container = document.getElementById('apps-container');
893
 
894
+ const response = await fetch('https://datasets-server.huggingface.co/rows?dataset=dvilasuero/jsvibes-qwen-gpt-oss-judged&config=default&split=train&offset=0&length=50');
895
  const data = await response.json();
896
 
897
  // Calculate win rates
 
902
  <h1>JSVibes</h1>
903
  <p>Vibe testing open models for simple but useful (web) code tasks</p>
904
  <div class="aisheets-credit">
905
+ Built with <a href="https://huggingface.co/spaces/aisheets/sheets" target="_blank">AISheets</a> |
906
+ Dataset: <a href="https://huggingface.co/datasets/dvilasuero/jsvibes-qwen-gptoss" target="_blank">jsvibes-qwen-gptoss</a>
907
  </div>
908
  </div>
909
  <div class="stats-header">
910
+ <p style="font-size: 14px; opacity: 0.8;">Automatically evaluated by Qwen3-Coder, GPT-OSS, and Kimi as judges. Results based on majority voting. Judgments are imperfect, test them yourself!</p>
911
  <div class="win-stats">
912
  <div class="stat">
913
+ <span class="model">Qwen3-Coder-480B-A35B-Instruct</span>
914
+ <span class="wins">${winStats.qwenMajorityWins} wins</span>
915
+ <div style="font-size: 14px; opacity: 0.8;">Majority: ${winStats.qwenMajorityRate}% | Individual: Qwen: ${winStats.qwenRateQwenJudge}% | GPT: ${winStats.qwenRateGptJudge}% | Kimi: ${winStats.qwenRateKimiJudge}%</div>
916
  </div>
917
  <div class="stat">
918
+ <span class="model">gpt-oss-120b</span>
919
+ <span class="wins">${winStats.gptOssMajorityWins} wins</span>
920
+ <div style="font-size: 14px; opacity: 0.8;">Majority: ${winStats.gptOssMajorityRate}% | Individual: Qwen: ${winStats.gptOssRateQwenJudge}% | GPT: ${winStats.gptOssRateGptJudge}% | Kimi: ${winStats.gptOssRateKimiJudge}%</div>
921
  </div>
922
  <div class="stat">
923
  <span class="model">Ties</span>
924
  <span class="wins">${winStats.ties}</span>
925
+ <div style="font-size: 14px; opacity: 0.8;">Total: ${winStats.totalEvaluated}</div>
926
  </div>
927
  </div>
928
  </div>
929
+ <div class="filter-section">
930
+ <div class="filter-controls">
931
+ <span class="filter-label">Filter by Winner:</span>
932
+ <button class="filter-btn active" data-filter="all" onclick="filterByWinner('all')">All Apps</button>
933
+ <button class="filter-btn" data-filter="qwen" onclick="filterByWinner('qwen')">Qwen Wins</button>
934
+ <button class="filter-btn" data-filter="gpt" onclick="filterByWinner('gpt')">GPT-OSS Wins</button>
935
+ <button class="filter-btn" data-filter="tie" onclick="filterByWinner('tie')">Ties</button>
936
+ </div>
937
+ </div>
938
  <div class="toc-container">
939
  <div class="toc-title" id="toc-title" onclick="toggleTOC()">📋 List of Apps ▶</div>
940
  <div class="toc-content collapsed" id="toc-content">
 
965
  const app = row.row;
966
 
967
  // Clean HTML content by removing markdown code blocks
 
968
  let qwenHtml = app['qwen3-coder'] || '';
969
+ let gptOssHtml = app['gpt-oss'] || '';
970
 
 
 
 
971
  if (qwenHtml.startsWith('```html')) {
972
  qwenHtml = qwenHtml.replace(/```html\n?/, '').replace(/```$/, '');
973
  }
974
+ if (gptOssHtml.startsWith('```html')) {
975
+ gptOssHtml = gptOssHtml.replace(/```html\n?/, '').replace(/```$/, '');
976
+ }
977
+
978
+ // Parse evaluation data from all three judges
979
+ const qwenEvaluation = parseEvaluation(app['eval-qwen-coder'] || '');
980
+ const gptEvaluation = parseEvaluation(app['eval-gpt-oss'] || '');
981
+ const kimiEvaluation = parseEvaluation(app['eval-kimi'] || '');
982
 
983
+ console.log(`App ${index} evaluations:`, {
984
+ qwen: qwenEvaluation,
985
+ gpt: gptEvaluation,
986
+ kimi: kimiEvaluation,
987
+ qwenRaw: app['eval-qwen-coder'],
988
+ gptRaw: app['eval-gpt-oss'],
989
+ kimiRaw: app['eval-kimi']
990
+ });
991
+
992
+ if (!qwenEvaluation && app['eval-qwen-coder']) {
993
+ console.log(`Failed to parse Qwen evaluation for app ${index}:`, app['eval-qwen-coder']);
994
+ }
995
+ if (!gptEvaluation && app['eval-gpt-oss']) {
996
+ console.log(`Failed to parse GPT evaluation for app ${index}:`, app['eval-gpt-oss']);
997
+ }
998
+ if (!kimiEvaluation && app['eval-kimi']) {
999
+ console.log(`Failed to parse Kimi evaluation for app ${index}:`, app['eval-kimi']);
1000
  }
1001
 
1002
  const section = document.createElement('div');
1003
  section.className = 'app-section';
1004
  section.id = `app-${index}`;
1005
+ // Calculate majority vote for this app
1006
+ let qwenVotes = 0;
1007
+ let gptOssVotes = 0;
1008
+ let totalVotes = 0;
1009
+
1010
+ if (qwenEvaluation) {
1011
+ totalVotes++;
1012
+ const winner = qwenEvaluation.winner.toLowerCase();
1013
+ if (winner.includes('qwen') || winner.includes('model 1')) {
1014
+ qwenVotes++;
1015
+ } else if (winner.includes('gpt') || winner.includes('model 2')) {
1016
+ gptOssVotes++;
1017
+ }
1018
+ }
1019
+
1020
+ if (gptEvaluation) {
1021
+ totalVotes++;
1022
+ const winner = gptEvaluation.winner.toLowerCase();
1023
+ if (winner.includes('qwen') || winner.includes('model 1')) {
1024
+ qwenVotes++;
1025
+ } else if (winner.includes('gpt') || winner.includes('model 2')) {
1026
+ gptOssVotes++;
1027
+ }
1028
+ }
1029
+
1030
+ if (kimiEvaluation) {
1031
+ totalVotes++;
1032
+ const winner = kimiEvaluation.winner.toLowerCase();
1033
+ if (winner.includes('qwen') || winner.includes('model 1')) {
1034
+ qwenVotes++;
1035
+ } else if (winner.includes('gpt') || winner.includes('model 2')) {
1036
+ gptOssVotes++;
1037
+ }
1038
+ }
1039
+
1040
+ let majorityWinner = '';
1041
+ let voteBreakdown = '';
1042
+
1043
+ if (qwenVotes > gptOssVotes) {
1044
+ majorityWinner = 'Qwen3-Coder-480B-A35B-Instruct';
1045
+ voteBreakdown = `Qwen: ${qwenVotes} | GPT-OSS: ${gptOssVotes}`;
1046
+ } else if (gptOssVotes > qwenVotes) {
1047
+ majorityWinner = 'gpt-oss-120b';
1048
+ voteBreakdown = `Qwen: ${qwenVotes} | GPT-OSS: ${gptOssVotes}`;
1049
+ } else {
1050
+ majorityWinner = 'Tie';
1051
+ voteBreakdown = `Qwen: ${qwenVotes} | GPT-OSS: ${gptOssVotes}`;
1052
+ }
1053
+
1054
  section.innerHTML = `
1055
+ <div class="description-header" id="header-${index}" onclick="toggleEvaluations(${index})" data-description="${app.description || 'No description available'}">
1056
+ ${index + 1}. ${app.description || 'No description available'} <span style="float: right; font-size: 12px; color: #6b7280; font-weight: 400;">View Details ▼</span>
1057
+ </div>
1058
+ <div class="winner-display" style="background: #f0f9ff; border: 1px solid #0ea5e9; padding: 8px 20px; margin: 0;">
1059
+ <div style="color: #0c4a6e; font-weight: 600; font-size: 14px;">🏆 Winner: ${majorityWinner}</div>
1060
+ <div style="color: #0c4a6e; font-size: 11px; margin-top: 2px;">Vote Breakdown: ${voteBreakdown} (${totalVotes} judges)</div>
1061
+ </div>
1062
+ <div class="evaluations-container" id="evaluations-${index}">
1063
+ ${qwenEvaluation ? createEvaluationSection(qwenEvaluation, index, 'qwen') : ''}
1064
+ ${gptEvaluation ? createEvaluationSection(gptEvaluation, index, 'gpt') : ''}
1065
+ ${kimiEvaluation ? createEvaluationSection(kimiEvaluation, index, 'kimi') : ''}
1066
  </div>
 
1067
  <div class="implementations">
1068
  <div class="impl-panel">
1069
+ <div class="impl-header">Qwen3-Coder-480B-A35B-Instruct</div>
1070
  <div class="iframe-container">
1071
+ <iframe srcdoc="${qwenHtml.replace(/"/g, '&quot;')}"></iframe>
1072
  </div>
1073
  </div>
1074
  <div class="impl-panel">
1075
+ <div class="impl-header">gpt-oss-120b</div>
1076
  <div class="iframe-container">
1077
+ <iframe srcdoc="${gptOssHtml.replace(/"/g, '&quot;')}"></iframe>
1078
  </div>
1079
  </div>
1080
  </div>