thinkwee commited on
Commit
9337e18
·
1 Parent(s): e12cf59

feat: Add new Qwen and Gemini model data, implement entropy data processing, and introduce various visualization and data management scripts.

Browse files
Files changed (5) hide show
  1. .gitignore +3 -0
  2. charts.js +777 -248
  3. data.js +0 -0
  4. index.html +79 -12
  5. styles.css +178 -181
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ probing/
2
+ # reference_scripts/
3
+ novelty_check/
charts.js CHANGED
@@ -2,73 +2,148 @@
2
  // Using Plotly.js with animate for smooth transitions
3
 
4
  // Common Plotly layout settings for dark theme
 
5
  const darkLayout = {
6
- paper_bgcolor: 'rgba(30, 41, 59, 0)',
7
- plot_bgcolor: 'rgba(30, 41, 59, 0)',
8
  font: {
9
- family: 'Inter, sans-serif',
10
- color: '#e2e8f0',
11
- size: 11
12
  },
13
  xaxis: {
14
- gridcolor: 'rgba(148, 163, 184, 0.12)',
15
- linecolor: 'rgba(148, 163, 184, 0.2)',
16
- tickfont: { color: '#94a3b8', size: 10 },
17
- title: { font: { color: '#e2e8f0', size: 11 } }
 
18
  },
19
  yaxis: {
20
- gridcolor: 'rgba(148, 163, 184, 0.12)',
21
- linecolor: 'rgba(148, 163, 184, 0.2)',
22
- tickfont: { color: '#94a3b8', size: 10 },
23
- title: { font: { color: '#e2e8f0', size: 11 } }
 
24
  },
25
  legend: {
26
- bgcolor: 'rgba(30, 41, 59, 0.9)',
27
- bordercolor: 'rgba(148, 163, 184, 0.2)',
28
- borderwidth: 1,
29
- font: { color: '#e2e8f0', size: 10 },
30
  orientation: 'h',
31
- y: -0.2,
32
  x: 0.5,
33
- xanchor: 'center'
 
34
  },
35
  hoverlabel: {
36
- bgcolor: '#1e293b',
37
- bordercolor: '#6366f1',
38
- font: { color: '#e2e8f0', size: 11 }
 
39
  },
40
- margin: { t: 20, r: 15, b: 60, l: 50 }
 
41
  };
42
 
43
  const plotlyConfig = {
44
- displayModeBar: true,
45
  responsive: true,
46
- modeBarButtonsToRemove: ['lasso2d', 'select2d', 'autoScale2d'],
47
  displaylogo: false
48
  };
49
 
50
  // Animation settings for smooth transitions
51
  const animationSettings = {
52
  transition: {
53
- duration: 500,
54
  easing: 'cubic-in-out'
55
  },
56
  frame: {
57
- duration: 500
 
58
  }
59
  };
60
 
61
  // Current state
62
  let currentScalingDim = 'turn';
63
- let currentProbingMode = 'byTurn';
 
64
 
65
  // ============================================================================
66
- // SCALING ANALYSIS - 3 Charts with animated dimension switching
67
  // ============================================================================
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  // ============================================================================
71
- // SCALING ANALYSIS - Normalized Coordinate System for Smooth Animation
72
  // ============================================================================
73
 
74
  // Helper to normalize values to [0, 1]
@@ -135,7 +210,28 @@ const SCALING_Y_RANGES = {
135
  'globem': [0, 50] // Python: y_min=0, y_max=50
136
  };
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  function initScalingCharts() {
 
 
 
 
 
 
 
139
  const scenarios = ['mimic', '10k', 'globem'];
140
 
141
  scenarios.forEach(scenario => {
@@ -161,7 +257,7 @@ function initScalingCharts() {
161
  x: modelNormX,
162
  y: data[model].accuracy,
163
  mode: 'lines+markers',
164
- name: model,
165
  line: { color: DDR_DATA.modelColors[model] || '#888', width: 2 },
166
  marker: { size: 6, color: DDR_DATA.modelColors[model] || '#888' },
167
  hovertemplate: `<b>${model}</b><br>Turn: %{customdata}<br>Accuracy: %{y:.2f}%<extra></extra>`,
@@ -175,7 +271,7 @@ function initScalingCharts() {
175
  ...darkLayout,
176
  xaxis: {
177
  ...darkLayout.xaxis,
178
- title: { text: 'Number of Interaction Turns', font: { size: 11, color: '#e2e8f0' } },
179
  type: 'linear', // ALWAYS LINEAR
180
  range: [-0.05, 1.05], // FIXED RANGE
181
  tickmode: 'array',
@@ -185,26 +281,26 @@ function initScalingCharts() {
185
  },
186
  yaxis: {
187
  ...darkLayout.yaxis,
188
- title: { text: 'Accuracy (%)', font: { size: 11, color: '#e2e8f0' } },
189
  dtick: 5,
190
  range: yRange
191
  },
192
- showlegend: true
193
  };
194
 
195
  Plotly.newPlot(`scaling-${scenario}`, traces, layout, plotlyConfig);
196
  });
197
- }
198
 
199
-
200
- // Inject CSS for line drawing animation
201
- const style = document.createElement('style');
202
- style.textContent = `
203
- .js-line path {
204
- transition: stroke-dashoffset 1s ease-out;
205
  }
206
- `;
207
- document.head.appendChild(style);
 
 
208
 
209
  function updateScalingCharts(dimension) {
210
  const scenarios = ['mimic', '10k', 'globem'];
@@ -240,7 +336,6 @@ function updateScalingCharts(dimension) {
240
  let offset = 0;
241
 
242
  const hoverLabels = { 'turn': 'Turns', 'token': 'Tokens', 'cost': 'Cost' };
243
- const hoverFormat = dimension === 'token' ? (v) => v.toLocaleString() : (dimension === 'cost' ? (v) => '$' + v.toFixed(4) : (v) => v);
244
 
245
  models.forEach((model, i) => {
246
  const len = data[model].turns.length;
@@ -259,13 +354,13 @@ function updateScalingCharts(dimension) {
259
  x: modelNormX,
260
  y: data[model].accuracy,
261
  customdata: rawValues,
262
- mode: 'lines+markers', // KEEP LINES - we'll hide them via CSS
 
263
  hovertemplate: `<b>${model}</b><br>${hoverLabels[dimension]}: %{customdata}<br>Accuracy: %{y:.2f}%<extra></extra>`
264
  });
265
  });
266
 
267
- // 4. Two-Phase Animation: Points Only -> Add Lines with Drawing Effect
268
-
269
  const graphDiv = document.getElementById(`scaling-${scenario}`);
270
 
271
  // Phase 1: Update to markers-only mode and animate points
@@ -295,48 +390,55 @@ function updateScalingCharts(dimension) {
295
  redraw: true
296
  }
297
  }).then(() => {
298
- // Phase 2: Add lines back and animate them drawing
 
299
  const linesAndMarkersTraces = newTraces.map(trace => ({
300
  ...trace,
301
- mode: 'lines+markers'
 
 
 
 
 
302
  }));
303
 
304
- // Use Plotly.react and wait for it to complete
305
  Plotly.react(`scaling-${scenario}`, linesAndMarkersTraces, {
306
  ...graphDiv.layout
307
  }, plotlyConfig).then(() => {
308
- // Give browser time to render
309
- requestAnimationFrame(() => {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  requestAnimationFrame(() => {
311
- // Try multiple selectors to find the line paths
312
- let paths = graphDiv.querySelectorAll('.scatterlayer .js-line path');
313
- if (paths.length === 0) {
314
- paths = graphDiv.querySelectorAll('.js-line path');
315
- }
316
- if (paths.length === 0) {
317
- paths = graphDiv.querySelectorAll('path.js-line');
318
- }
319
- if (paths.length === 0) {
320
- paths = graphDiv.querySelectorAll('.scatter path');
321
- }
322
-
323
- paths.forEach((path, idx) => {
324
  const len = path.getTotalLength();
325
-
326
  if (len > 0) {
327
- // Reset any previous animation
328
- path.style.transition = 'none';
329
- path.style.strokeDasharray = len + ' ' + len;
330
- path.style.strokeDashoffset = len;
331
-
332
- // Force reflow
333
- path.getBoundingClientRect();
334
-
335
- // Start animation after a tiny delay
336
- setTimeout(() => {
337
- path.style.transition = 'stroke-dashoffset 0.8s ease-out';
338
- path.style.strokeDashoffset = '0';
339
- }, 10);
340
  }
341
  });
342
  });
@@ -346,18 +448,19 @@ function updateScalingCharts(dimension) {
346
  });
347
  }
348
 
349
-
350
-
351
-
352
- // Dimension toggle event listeners
353
- document.querySelectorAll('.dim-btn:not(.probing-dim)').forEach(btn => {
354
- btn.addEventListener('click', () => {
355
- document.querySelectorAll('.dim-btn:not(.probing-dim)').forEach(b => b.classList.remove('active'));
356
- btn.classList.add('active');
357
-
358
- const dimension = btn.dataset.dim;
359
- currentScalingDim = dimension;
360
- updateScalingCharts(dimension);
 
361
  });
362
  });
363
 
@@ -397,8 +500,6 @@ function getDisplayName(model) {
397
  return RANKING_DISPLAY_NAMES[model] || model;
398
  }
399
 
400
- let currentRankingMode = 'novelty';
401
-
402
  function renderRankingCharts(mode, animate = false) {
403
  const scenarios = [
404
  { key: 'MIMIC', id: 'mimic' },
@@ -410,69 +511,116 @@ function renderRankingCharts(mode, animate = false) {
410
  const rawData = DDR_DATA.ranking[key];
411
  if (!rawData) return;
412
 
413
- // Sort by primary ranking based on mode
414
- let sortedModels;
 
 
 
 
 
 
415
  if (mode === 'novelty') {
416
- sortedModels = [...rawData].sort((a, b) => a.bt_rank - b.bt_rank);
 
417
  } else {
418
- sortedModels = [...rawData].sort((a, b) => a.acc_rank - b.acc_rank);
 
 
 
 
 
 
 
 
 
 
419
  }
420
 
421
- const models = sortedModels;
422
- const topN = models.length;
 
 
 
 
 
 
423
  const traces = [];
424
 
425
- // Connection lines (dashed)
426
- models.forEach((m, i) => {
427
- traces.push({
428
- x: [m.bt_rank, m.acc_rank],
429
- y: [i, i],
430
- mode: 'lines',
431
- line: {
432
- color: 'rgba(148, 163, 184, 0.4)',
433
- width: 1.5,
434
- dash: 'dash'
435
- },
436
- showlegend: false,
437
- hoverinfo: 'skip'
438
- });
439
  });
440
 
441
- // Novelty rank points (filled circles)
442
  traces.push({
443
- x: models.map(m => m.bt_rank),
444
- y: models.map((_, i) => i),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  mode: 'markers',
446
  name: 'Novelty Rank',
447
  marker: {
448
  size: mode === 'novelty' ? 12 : 10,
449
  symbol: 'circle',
450
- color: models.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR),
451
  line: { color: '#fff', width: 1.5 }
452
  },
453
- text: models.map(m => `<b>${getDisplayName(m.model)}</b><br>Novelty: #${m.bt_rank}<br>Win Rate: ${m.win_rate}%`),
454
  hovertemplate: '%{text}<extra></extra>'
455
  });
456
 
457
- // Accuracy rank points (hollow diamonds)
458
  traces.push({
459
- x: models.map(m => m.acc_rank),
460
- y: models.map((_, i) => i),
461
  mode: 'markers',
462
  name: 'Accuracy Rank',
463
  marker: {
464
  size: mode === 'accuracy' ? 12 : 10,
465
  symbol: 'diamond-open',
466
- color: models.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR),
467
  line: { width: 2 }
468
  },
469
- text: models.map(m => `<b>${getDisplayName(m.model)}</b><br>Accuracy: #${m.acc_rank}<br>${m.accuracy}%`),
470
  hovertemplate: '%{text}<extra></extra>'
471
  });
472
 
473
- // Calculate correlation
474
- const btRanks = models.map(m => m.bt_rank);
475
- const accRanks = models.map(m => m.acc_rank);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
476
  const n = btRanks.length;
477
  const meanBt = btRanks.reduce((a, b) => a + b, 0) / n;
478
  const meanAcc = accRanks.reduce((a, b) => a + b, 0) / n;
@@ -490,19 +638,18 @@ function renderRankingCharts(mode, animate = false) {
490
  ...darkLayout,
491
  xaxis: {
492
  ...darkLayout.xaxis,
493
- title: { text: 'Rank', font: { size: 10, color: '#e2e8f0' } },
494
- range: [topN + 0.5, 0.5],
495
- dtick: 2,
496
- tick0: 2
 
497
  },
498
  yaxis: {
499
  ...darkLayout.yaxis,
500
- tickmode: 'array',
501
- tickvals: models.map((_, i) => i),
502
- ticktext: models.map(m => getDisplayName(m.model)),
503
- tickfont: { size: 9, color: '#94a3b8' },
504
- automargin: true,
505
- range: [-0.5, models.length - 0.5]
506
  },
507
  showlegend: false,
508
  annotations: [
@@ -513,8 +660,8 @@ function renderRankingCharts(mode, animate = false) {
513
  yref: 'paper',
514
  text: `ρ = ${rho.toFixed(2)}`,
515
  showarrow: false,
516
- font: { size: 11, color: '#94a3b8', family: 'Inter' },
517
- bgcolor: 'rgba(30, 41, 59, 0.8)',
518
  borderpad: 4
519
  },
520
  {
@@ -524,16 +671,21 @@ function renderRankingCharts(mode, animate = false) {
524
  yref: 'paper',
525
  text: sortLabel,
526
  showarrow: false,
527
- font: { size: 10, color: mode === 'novelty' ? PROPRIETARY_COLOR : OPENSOURCE_COLOR, family: 'Inter' },
528
- bgcolor: 'rgba(30, 41, 59, 0.8)',
529
  borderpad: 4
530
  }
531
  ],
532
- margin: { t: 15, r: 15, b: 40, l: 120 }
 
 
533
  };
534
 
535
  if (animate) {
536
- Plotly.react(`ranking-${id}`, traces, layout, plotlyConfig);
 
 
 
537
  } else {
538
  Plotly.newPlot(`ranking-${id}`, traces, layout, plotlyConfig);
539
  }
@@ -541,27 +693,35 @@ function renderRankingCharts(mode, animate = false) {
541
  }
542
 
543
  function initRankingCharts() {
 
 
 
 
 
544
  renderRankingCharts('novelty', false);
545
  }
546
 
547
  // Ranking mode toggle event listener
548
- document.querySelectorAll('.ranking-dim').forEach(btn => {
549
- btn.addEventListener('click', () => {
550
- const mode = btn.dataset.mode;
551
- if (mode === currentRankingMode) return;
552
-
553
- document.querySelectorAll('.ranking-dim').forEach(b => b.classList.remove('active'));
554
- btn.classList.add('active');
555
-
556
- currentRankingMode = mode;
557
- renderRankingCharts(mode, true);
 
 
 
 
558
  });
559
  });
560
 
561
  // ============================================================================
562
  // TURN DISTRIBUTION - 3 Charts (Ridgeline style)
563
  // ============================================================================
564
- // Turn distribution display name mapping
565
  const TURN_DISPLAY_NAMES = {
566
  'run_api_deepseek_deepseek-chat': 'DeepSeek-V3.2',
567
  'qwen3-next-80b-a3b-instruct': 'Qwen3-Next-80BA3B',
@@ -598,17 +758,23 @@ function getTurnDisplayName(model) {
598
  }
599
 
600
  function initTurnCharts() {
 
 
 
 
 
 
601
  const scenarios = ['mimic', '10k', 'globem'];
602
 
603
- // Family colors
604
  const familyColors = {
605
- 'claude': '#FF6D00',
606
- 'gpt': '#00C853',
607
- 'gemini': '#2196F3',
608
- 'deepseek': '#E91E63',
609
- 'glm': '#9C27B0',
610
- 'kimi': '#FFA500',
611
- 'minimax': '#20B2AA',
612
  'qwen': '#0EA5E9',
613
  'llama': '#F59E0B'
614
  };
@@ -618,56 +784,65 @@ function initTurnCharts() {
618
  for (const [family, color] of Object.entries(familyColors)) {
619
  if (lower.includes(family)) return color;
620
  }
621
- return '#888';
622
  }
623
 
624
  scenarios.forEach(scenario => {
625
  const data = DDR_DATA.turn[scenario];
626
  if (!data) return;
627
 
628
- // Sort by median descending (highest median at top)
629
  const sortedData = [...data].sort((a, b) => b.median - a.median);
630
 
631
- // Limit to top 15 models for readability
632
- const displayData = sortedData.slice(0, 15);
633
 
634
  const traces = [];
635
- const binLabels = ['0-10', '10-20', '20-30', '30-40', '40-50', '50-60', '60-70', '70-80', '80-90', '90-100'];
636
  const binCenters = [5, 15, 25, 35, 45, 55, 65, 75, 85, 95];
637
 
638
- // Create ridgeline traces (area charts stacked vertically)
639
  displayData.forEach((model, idx) => {
640
  const color = getModelColor(model.model);
641
  const yOffset = idx;
642
  const displayName = getTurnDisplayName(model.model);
643
-
644
- // Scale distribution to fit in the row (max height ~0.8)
645
  const maxDist = Math.max(...model.distribution) || 1;
646
- const scaledDist = model.distribution.map(d => d / maxDist * 0.7);
647
 
648
- // Create filled area trace
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
649
  traces.push({
650
- x: binCenters,
651
- y: scaledDist.map(d => yOffset + d),
652
  mode: 'lines',
 
 
 
 
 
 
653
  fill: 'toself',
654
- fillcolor: color + '40', // 25% opacity
655
- line: { color: color, width: 1.5 },
656
  name: displayName,
657
- text: model.distribution.map((d, i) =>
658
- `${displayName}<br>${binLabels[i]} turns: ${d.toFixed(1)}%<br>Median: ${model.median}`
659
- ),
660
- hovertemplate: '%{text}<extra></extra>',
661
- showlegend: false
662
- });
663
-
664
- // Add baseline
665
- traces.push({
666
- x: [0, 100],
667
- y: [yOffset, yOffset],
668
- mode: 'lines',
669
- line: { color: 'rgba(148, 163, 184, 0.2)', width: 0.5 },
670
- hoverinfo: 'skip',
671
  showlegend: false
672
  });
673
  });
@@ -676,17 +851,19 @@ function initTurnCharts() {
676
  ...darkLayout,
677
  xaxis: {
678
  ...darkLayout.xaxis,
679
- title: { text: 'Number of Turns', font: { size: 11, color: '#e2e8f0' } },
680
  range: [0, 100],
681
  dtick: 20
682
  },
683
  yaxis: {
684
  ...darkLayout.yaxis,
685
  tickmode: 'array',
686
- tickvals: displayData.map((_, i) => i),
687
  ticktext: displayData.map(m => getTurnDisplayName(m.model)),
688
  automargin: true,
689
- range: [-0.5, displayData.length]
 
 
690
  },
691
  margin: { ...darkLayout.margin, l: 140 },
692
  showlegend: false
@@ -700,7 +877,12 @@ function initTurnCharts() {
700
  // PROBING RESULTS - 3 Charts with animated mode switching
701
  // ============================================================================
702
  function initProbingCharts() {
703
- renderProbingCharts('byTurn');
 
 
 
 
 
704
  }
705
 
706
  function renderProbingCharts(mode) {
@@ -708,31 +890,31 @@ function renderProbingCharts(mode) {
708
  const scenarioIds = { 'mimic': 'mimic', 'globem': 'globem', '10k': '10k' };
709
 
710
  scenarios.forEach(scenario => {
711
- const data = DDR_DATA.probing[mode]?.[scenario];
 
712
  if (!data) return;
713
 
714
  const traces = [];
715
- const models = Object.keys(data);
 
 
716
 
717
  models.forEach(model => {
718
  const modelData = data[model];
719
  const xKey = mode === 'byTurn' ? 'turns' : 'progress';
720
  const xLabel = mode === 'byTurn' ? 'Turn' : 'Progress (%)';
721
 
722
- // Main line
723
  traces.push({
724
  x: modelData[xKey],
725
  y: modelData.logprob,
726
- mode: 'lines+markers',
727
  name: model,
728
  line: {
729
- color: DDR_DATA.probingColors[model] || '#888',
730
  width: 2
731
  },
732
- marker: {
733
- size: 4,
734
- color: DDR_DATA.probingColors[model] || '#888'
735
- },
736
  hovertemplate: `<b>${model}</b><br>${xLabel}: %{x}<br>Log Prob: %{y:.2f}<extra></extra>`
737
  });
738
 
@@ -745,7 +927,7 @@ function renderProbingCharts(mode) {
745
  x: [...modelData[xKey], ...modelData[xKey].slice().reverse()],
746
  y: [...upper, ...lower.slice().reverse()],
747
  fill: 'toself',
748
- fillcolor: (DDR_DATA.probingColors[model] || '#888') + '25',
749
  line: { width: 0 },
750
  showlegend: false,
751
  hoverinfo: 'skip'
@@ -753,50 +935,70 @@ function renderProbingCharts(mode) {
753
  }
754
  });
755
 
 
 
 
 
 
 
 
 
 
 
 
756
  const layout = {
757
  ...darkLayout,
758
  xaxis: {
759
  ...darkLayout.xaxis,
760
- title: { text: mode === 'byTurn' ? 'Turn' : 'Interaction Progress (%)', font: { size: 11, color: '#e2e8f0' } }
761
  },
762
  yaxis: {
763
  ...darkLayout.yaxis,
764
- title: { text: 'Avg Log Probability', font: { size: 11, color: '#e2e8f0' } }
765
  },
766
- showlegend: true
767
  };
768
 
769
- Plotly.newPlot(`probing-${scenarioIds[scenario]}`, traces, layout, plotlyConfig);
 
 
 
 
 
 
 
 
 
 
 
 
 
770
  });
771
- }
772
 
773
- // Probing dimension toggle
774
- document.querySelectorAll('.probing-dim').forEach(btn => {
775
- btn.addEventListener('click', () => {
776
- document.querySelectorAll('.probing-dim').forEach(b => b.classList.remove('active'));
777
- btn.classList.add('active');
 
 
778
 
779
- const mode = btn.dataset.mode;
780
- currentProbingMode = mode;
 
781
 
782
- // Add updating class for visual feedback
783
- ['mimic', 'globem', '10k'].forEach(s => {
784
- document.getElementById(`probing-${s}`).classList.add('chart-updating');
785
- });
786
 
787
- setTimeout(() => {
788
- renderProbingCharts(mode);
789
- ['mimic', 'globem', '10k'].forEach(s => {
790
- document.getElementById(`probing-${s}`).classList.remove('chart-updating');
791
- });
792
- }, 150);
793
- });
794
- });
795
 
796
  // ============================================================================
797
  // ERROR ANALYSIS - Hierarchical Bar Chart
798
  // ============================================================================
799
  function initErrorChart() {
 
 
 
 
 
 
800
  const data = DDR_DATA.error;
801
  if (!data || data.length === 0) return;
802
 
@@ -820,7 +1022,7 @@ function initErrorChart() {
820
  },
821
  text: data.map(d => `${d.percentage}%`),
822
  textposition: 'outside',
823
- textfont: { size: 11, color: '#e2e8f0' },
824
  hovertemplate: '<b>%{x}</b><br>%{y:.1f}%<br>Count: %{customdata}<extra></extra>',
825
  customdata: data.map(d => d.count),
826
  showlegend: false
@@ -837,7 +1039,7 @@ function initErrorChart() {
837
  y: maxPct * 1.15,
838
  text: `<b>${catName}</b>`,
839
  showarrow: false,
840
- font: { size: 10, color: '#e2e8f0' },
841
  xanchor: 'center',
842
  yanchor: 'bottom'
843
  });
@@ -848,11 +1050,11 @@ function initErrorChart() {
848
  xaxis: {
849
  ...darkLayout.xaxis,
850
  tickangle: -30,
851
- tickfont: { size: 10, color: '#94a3b8' }
852
  },
853
  yaxis: {
854
  ...darkLayout.yaxis,
855
- title: { text: 'Percentage (%)', font: { size: 11, color: '#e2e8f0' } },
856
  range: [0, maxPct * 1.25]
857
  },
858
  annotations: annotations,
@@ -863,29 +1065,356 @@ function initErrorChart() {
863
  }
864
 
865
  // ============================================================================
866
- // INITIALIZE ALL CHARTS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
867
  // ============================================================================
868
  document.addEventListener('DOMContentLoaded', () => {
869
- initScalingCharts();
870
- initRankingCharts();
871
- initTurnCharts();
872
- initErrorChart();
873
- initProbingCharts();
 
 
 
 
 
 
 
 
874
  });
875
 
876
- // Handle window resize
877
  let resizeTimeout;
878
- window.addEventListener('resize', () => {
879
- clearTimeout(resizeTimeout);
880
- resizeTimeout = setTimeout(() => {
 
 
 
 
 
 
881
  ['mimic', '10k', 'globem'].forEach(s => {
882
- Plotly.Plots.resize(`scaling-${s}`);
883
- Plotly.Plots.resize(`ranking-${s}`);
884
- Plotly.Plots.resize(`turn-${s}`);
885
- Plotly.Plots.resize(`probing-${s}`);
886
  });
887
- if (document.getElementById('error-chart')) {
888
- Plotly.Plots.resize('error-chart');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
889
  }
890
- }, 100);
 
 
 
 
 
 
 
 
 
891
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  // Using Plotly.js with animate for smooth transitions
3
 
4
  // Common Plotly layout settings for dark theme
5
+ // Common Plotly layout settings for Apple Minimalist theme
6
  const darkLayout = {
7
+ paper_bgcolor: 'rgba(0,0,0,0)',
8
+ plot_bgcolor: 'rgba(0,0,0,0)',
9
  font: {
10
+ family: '-apple-system, BlinkMacSystemFont, "SF Pro Text", "Helvetica Neue", sans-serif',
11
+ color: '#000000', // Pure black for max contrast
12
+ size: 12 // Base font size increased
13
  },
14
  xaxis: {
15
+ gridcolor: '#d1d1d6', // Darker grid lines
16
+ linecolor: '#d1d1d6',
17
+ tickfont: { color: '#515154', size: 11 }, // Larger and darker ticks
18
+ title: { font: { color: '#000000', size: 12, weight: 600 } },
19
+ zerolinecolor: '#d1d1d6'
20
  },
21
  yaxis: {
22
+ gridcolor: '#d1d1d6',
23
+ linecolor: '#d1d1d6',
24
+ tickfont: { color: '#515154', size: 11 },
25
+ title: { font: { color: '#000000', size: 12, weight: 600 } },
26
+ zerolinecolor: '#d1d1d6'
27
  },
28
  legend: {
29
+ bgcolor: 'rgba(0,0,0,0)',
30
+ bordercolor: 'rgba(0,0,0,0)',
31
+ borderwidth: 0,
32
+ font: { color: '#000000', size: 11 },
33
  orientation: 'h',
34
+ y: 0.99,
35
  x: 0.5,
36
+ xanchor: 'center',
37
+ yanchor: 'top'
38
  },
39
  hoverlabel: {
40
+ bgcolor: '#ffffff',
41
+ bordercolor: 'rgba(0,0,0,0.1)',
42
+ font: { color: '#000000', size: 12 },
43
+ namelength: -1
44
  },
45
+ hovermode: 'closest', // Highlight closest point/element on hover
46
+ margin: { t: 30, r: 20, b: 60, l: 60 } // Increased margins
47
  };
48
 
49
  const plotlyConfig = {
50
+ displayModeBar: false, // Hide modebar completely
51
  responsive: true,
 
52
  displaylogo: false
53
  };
54
 
55
  // Animation settings for smooth transitions
56
  const animationSettings = {
57
  transition: {
58
+ duration: 750,
59
  easing: 'cubic-in-out'
60
  },
61
  frame: {
62
+ duration: 750,
63
+ redraw: true
64
  }
65
  };
66
 
67
  // Current state
68
  let currentScalingDim = 'turn';
69
+ let currentProbingMode = 'byProgress';
70
+ let currentRankingMode = 'novelty';
71
 
72
  // ============================================================================
73
+ // PERFORMANCE OPTIMIZATION UTILITIES
74
  // ============================================================================
75
 
76
+ // Track which charts have been initialized
77
+ const initializedCharts = new Set();
78
+
79
+ // Lazy loading observer - only render charts when they enter viewport
80
+ const lazyLoadObserver = new IntersectionObserver((entries) => {
81
+ entries.forEach(entry => {
82
+ if (entry.isIntersecting) {
83
+ const section = entry.target;
84
+ const sectionId = section.id;
85
+
86
+ if (!initializedCharts.has(sectionId)) {
87
+ initializedCharts.add(sectionId);
88
+
89
+ // Use requestIdleCallback for non-blocking initialization
90
+ const initFn = () => {
91
+ switch (sectionId) {
92
+ case 'scaling': initScalingCharts(); break;
93
+ case 'ranking': initRankingCharts(); break;
94
+ case 'turn': initTurnCharts(); break;
95
+ case 'entropy': initEntropyCharts(); break;
96
+ case 'error': initErrorChart(); break;
97
+ case 'probing': initProbingCharts(); break;
98
+ }
99
+ };
100
+
101
+ if ('requestIdleCallback' in window) {
102
+ requestIdleCallback(initFn, { timeout: 100 });
103
+ } else {
104
+ setTimeout(initFn, 0);
105
+ }
106
+ }
107
+ }
108
+ });
109
+ }, {
110
+ rootMargin: '100px 0px', // Start loading 100px before entering viewport
111
+ threshold: 0.01
112
+ });
113
+
114
+ // Debounce utility for hover effects
115
+ function debounce(fn, delay) {
116
+ let timeoutId;
117
+ return function (...args) {
118
+ clearTimeout(timeoutId);
119
+ timeoutId = setTimeout(() => fn.apply(this, args), delay);
120
+ };
121
+ }
122
+
123
+ // Throttle utility for frequent events
124
+ function throttle(fn, limit) {
125
+ let inThrottle = false;
126
+ return function (...args) {
127
+ if (!inThrottle) {
128
+ fn.apply(this, args);
129
+ inThrottle = true;
130
+ setTimeout(() => inThrottle = false, limit);
131
+ }
132
+ };
133
+ }
134
+
135
+ // Batch DOM updates using requestAnimationFrame
136
+ function batchUpdate(updateFn) {
137
+ return new Promise(resolve => {
138
+ requestAnimationFrame(() => {
139
+ updateFn();
140
+ resolve();
141
+ });
142
+ });
143
+ }
144
 
145
  // ============================================================================
146
+ // SCALING ANALYSIS - 3 Charts with animated dimension switching
147
  // ============================================================================
148
 
149
  // Helper to normalize values to [0, 1]
 
210
  'globem': [0, 50] // Python: y_min=0, y_max=50
211
  };
212
 
213
+ // Populate shared legend for a section
214
+ function populateSharedLegend(containerId, models, colorMap) {
215
+ const container = document.getElementById(containerId);
216
+ if (!container) return;
217
+
218
+ container.innerHTML = models.map(model => {
219
+ const color = (colorMap && colorMap[model]) || '#888';
220
+ return `<div class="legend-item">
221
+ <span class="legend-color" style="background: ${color}"></span>
222
+ <span>${model}</span>
223
+ </div>`;
224
+ }).join('');
225
+ }
226
+
227
  function initScalingCharts() {
228
+ // Check if data is loaded
229
+ if (typeof DDR_DATA === 'undefined' || !DDR_DATA.scaling) {
230
+ console.warn('DDR_DATA not loaded yet, retrying...');
231
+ setTimeout(initScalingCharts, 100);
232
+ return;
233
+ }
234
+
235
  const scenarios = ['mimic', '10k', 'globem'];
236
 
237
  scenarios.forEach(scenario => {
 
257
  x: modelNormX,
258
  y: data[model].accuracy,
259
  mode: 'lines+markers',
260
+ name: model, // CRITICAL: Set model name for legend
261
  line: { color: DDR_DATA.modelColors[model] || '#888', width: 2 },
262
  marker: { size: 6, color: DDR_DATA.modelColors[model] || '#888' },
263
  hovertemplate: `<b>${model}</b><br>Turn: %{customdata}<br>Accuracy: %{y:.2f}%<extra></extra>`,
 
271
  ...darkLayout,
272
  xaxis: {
273
  ...darkLayout.xaxis,
274
+ title: { text: 'Number of Interaction Turns', font: { size: 11, color: '#1d1d1f' } },
275
  type: 'linear', // ALWAYS LINEAR
276
  range: [-0.05, 1.05], // FIXED RANGE
277
  tickmode: 'array',
 
281
  },
282
  yaxis: {
283
  ...darkLayout.yaxis,
284
+ title: { text: 'Accuracy (%)', font: { size: 11, color: '#1d1d1f' } },
285
  dtick: 5,
286
  range: yRange
287
  },
288
+ showlegend: false // Use shared legend instead
289
  };
290
 
291
  Plotly.newPlot(`scaling-${scenario}`, traces, layout, plotlyConfig);
292
  });
 
293
 
294
+ // Populate shared legend with models from first scenario
295
+ const firstScenario = scenarios.find(s => DDR_DATA.scaling[s]);
296
+ if (firstScenario) {
297
+ const models = Object.keys(DDR_DATA.scaling[firstScenario]);
298
+ populateSharedLegend('scaling-legend', models, DDR_DATA.modelColors);
 
299
  }
300
+
301
+ // Apply hover effects after charts are rendered
302
+ setTimeout(() => applyHoverEffectsForSection('scaling'), 100);
303
+ }
304
 
305
  function updateScalingCharts(dimension) {
306
  const scenarios = ['mimic', '10k', 'globem'];
 
336
  let offset = 0;
337
 
338
  const hoverLabels = { 'turn': 'Turns', 'token': 'Tokens', 'cost': 'Cost' };
 
339
 
340
  models.forEach((model, i) => {
341
  const len = data[model].turns.length;
 
354
  x: modelNormX,
355
  y: data[model].accuracy,
356
  customdata: rawValues,
357
+ name: model, // CRITICAL: Preserve model name
358
+ mode: 'lines+markers',
359
  hovertemplate: `<b>${model}</b><br>${hoverLabels[dimension]}: %{customdata}<br>Accuracy: %{y:.2f}%<extra></extra>`
360
  });
361
  });
362
 
363
+ // Two-Phase Animation: Points Only -> Add Lines with Drawing Effect
 
364
  const graphDiv = document.getElementById(`scaling-${scenario}`);
365
 
366
  // Phase 1: Update to markers-only mode and animate points
 
390
  redraw: true
391
  }
392
  }).then(() => {
393
+ // Phase 2: Add lines back with drawing animation
394
+ // CRITICAL: Pre-hide lines BEFORE react renders them
395
  const linesAndMarkersTraces = newTraces.map(trace => ({
396
  ...trace,
397
+ mode: 'lines+markers',
398
+ line: {
399
+ ...trace.line,
400
+ // Start with invisible line (will be animated in)
401
+ width: 0
402
+ }
403
  }));
404
 
405
+ // First, add the lines with width 0 (invisible)
406
  Plotly.react(`scaling-${scenario}`, linesAndMarkersTraces, {
407
  ...graphDiv.layout
408
  }, plotlyConfig).then(() => {
409
+ // Now set line width back and prepare for stroke animation
410
+ const visibleTraces = newTraces.map(trace => ({
411
+ ...trace,
412
+ mode: 'lines+markers'
413
+ }));
414
+
415
+ // Immediately query paths and set them to hidden state BEFORE making visible
416
+ const paths = graphDiv.querySelectorAll('.scatterlayer .trace .lines path');
417
+
418
+ // Pre-set all paths to invisible using stroke-dashoffset
419
+ paths.forEach((path) => {
420
+ const len = path.getTotalLength();
421
+ if (len > 0) {
422
+ path.style.transition = 'none';
423
+ path.style.strokeDasharray = len + ' ' + len;
424
+ path.style.strokeDashoffset = len;
425
+ }
426
+ });
427
+
428
+ // Now make lines visible (they're hidden by dashoffset)
429
+ Plotly.restyle(`scaling-${scenario}`, {
430
+ 'line.width': models.map(() => 2)
431
+ }).then(() => {
432
+ // Force reflow
433
+ graphDiv.getBoundingClientRect();
434
+
435
+ // Start the stroke animation after a short delay
436
  requestAnimationFrame(() => {
437
+ paths.forEach((path) => {
 
 
 
 
 
 
 
 
 
 
 
 
438
  const len = path.getTotalLength();
 
439
  if (len > 0) {
440
+ path.style.transition = 'stroke-dashoffset 0.8s ease-out';
441
+ path.style.strokeDashoffset = '0';
 
 
 
 
 
 
 
 
 
 
 
442
  }
443
  });
444
  });
 
448
  });
449
  }
450
 
451
+ // Dimension toggle event listeners for SCALING only
452
+ document.addEventListener('DOMContentLoaded', () => {
453
+ const scalingButtons = document.querySelectorAll('#scaling .dim-btn');
454
+ scalingButtons.forEach(btn => {
455
+ btn.addEventListener('click', () => {
456
+ // Only update scaling buttons
457
+ scalingButtons.forEach(b => b.classList.remove('active'));
458
+ btn.classList.add('active');
459
+
460
+ const dimension = btn.dataset.dim;
461
+ currentScalingDim = dimension;
462
+ updateScalingCharts(dimension);
463
+ });
464
  });
465
  });
466
 
 
500
  return RANKING_DISPLAY_NAMES[model] || model;
501
  }
502
 
 
 
503
  function renderRankingCharts(mode, animate = false) {
504
  const scenarios = [
505
  { key: 'MIMIC', id: 'mimic' },
 
511
  const rawData = DDR_DATA.ranking[key];
512
  if (!rawData) return;
513
 
514
+ // 1. Establish Base Order (Always sorted by Novelty/BT Rank initially)
515
+ // This ensures traces maintain object identity for animation
516
+ const baseModels = [...rawData].sort((a, b) => a.bt_rank - b.bt_rank);
517
+ const topN = baseModels.length;
518
+
519
+ // 2. Calculate Target Y-Positions based on current mode
520
+ // We need to know where each model *should* be
521
+ let sortedIndices;
522
  if (mode === 'novelty') {
523
+ // In novelty mode, order matches baseModels (0, 1, 2...)
524
+ sortedIndices = baseModels.map((_, i) => i);
525
  } else {
526
+ // In accuracy mode, we need to find the rank index of each baseModel
527
+ // Sort a copy to find the target order
528
+ const accSorted = [...baseModels].map((m, i) => ({ model: m.model, acc_rank: m.acc_rank, originalIdx: i }))
529
+ .sort((a, b) => a.acc_rank - b.acc_rank);
530
+
531
+ // Map: originalIdx -> targetY
532
+ const indexMap = new Array(topN);
533
+ accSorted.forEach((item, targetY) => {
534
+ indexMap[item.originalIdx] = targetY;
535
+ });
536
+ sortedIndices = indexMap;
537
  }
538
 
539
+ // 3. Prepare Data Arrays using Base Order
540
+ // Invert Y-values so Rank 1 (Best) is at the TOP
541
+ const yValues = sortedIndices.map(idx => topN - 1 - idx);
542
+ const xBt = baseModels.map(m => m.bt_rank);
543
+ const xAcc = baseModels.map(m => m.acc_rank);
544
+ const names = baseModels.map(m => getDisplayName(m.model));
545
+ const colors = baseModels.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR);
546
+
547
  const traces = [];
548
 
549
+ // Trace 0: Connection Lines (Consolidated)
550
+ const lineX = [];
551
+ const lineY = [];
552
+ baseModels.forEach((_, i) => {
553
+ lineX.push(xBt[i], xAcc[i], null);
554
+ lineY.push(yValues[i], yValues[i], null);
 
 
 
 
 
 
 
 
555
  });
556
 
 
557
  traces.push({
558
+ x: lineX,
559
+ y: lineY,
560
+ mode: 'lines',
561
+ line: {
562
+ color: 'rgba(148, 163, 184, 0.4)',
563
+ width: 1.5,
564
+ dash: 'dash'
565
+ },
566
+ showlegend: false,
567
+ hoverinfo: 'skip'
568
+ });
569
+
570
+ // Trace 1: Novelty Rank Points
571
+ traces.push({
572
+ x: xBt,
573
+ y: yValues,
574
  mode: 'markers',
575
  name: 'Novelty Rank',
576
  marker: {
577
  size: mode === 'novelty' ? 12 : 10,
578
  symbol: 'circle',
579
+ color: colors,
580
  line: { color: '#fff', width: 1.5 }
581
  },
582
+ text: baseModels.map(m => `<b>${getDisplayName(m.model)}</b><br>Novelty: #${m.bt_rank}<br>Win Rate: ${m.win_rate}%`),
583
  hovertemplate: '%{text}<extra></extra>'
584
  });
585
 
586
+ // Trace 2: Accuracy Rank Points
587
  traces.push({
588
+ x: xAcc,
589
+ y: yValues,
590
  mode: 'markers',
591
  name: 'Accuracy Rank',
592
  marker: {
593
  size: mode === 'accuracy' ? 12 : 10,
594
  symbol: 'diamond-open',
595
+ color: colors,
596
  line: { width: 2 }
597
  },
598
+ text: baseModels.map(m => `<b>${getDisplayName(m.model)}</b><br>Accuracy: #${m.acc_rank}<br>${m.accuracy}%`),
599
  hovertemplate: '%{text}<extra></extra>'
600
  });
601
 
602
+ // Trace 3: Animated Y-Axis Labels (Model Names)
603
+ // Place them to the left of the max rank.
604
+ // X-axis is inverted (Max -> 1), so we place labels at Max + padding
605
+ // We want labels on the LEFT side.
606
+ // If range is [topN + 8, 0.5], then topN + 8 is on the LEFT.
607
+ // So we place labels at topN + 1.
608
+ const labelX = new Array(topN).fill(topN + 1);
609
+
610
+ traces.push({
611
+ x: labelX,
612
+ y: yValues,
613
+ mode: 'text',
614
+ text: names,
615
+ textposition: 'middle left',
616
+ textfont: { size: 10, color: '#515154', family: '-apple-system, BlinkMacSystemFont, "SF Pro Text", sans-serif' },
617
+ hoverinfo: 'skip',
618
+ showlegend: false
619
+ });
620
+
621
+ // Calculate correlation (same as before)
622
+ const btRanks = baseModels.map(m => m.bt_rank);
623
+ const accRanks = baseModels.map(m => m.acc_rank);
624
  const n = btRanks.length;
625
  const meanBt = btRanks.reduce((a, b) => a + b, 0) / n;
626
  const meanAcc = accRanks.reduce((a, b) => a + b, 0) / n;
 
638
  ...darkLayout,
639
  xaxis: {
640
  ...darkLayout.xaxis,
641
+ title: { text: 'Rank', font: { size: 10, color: '#1d1d1f' } },
642
+ range: [topN + 8, 0.5], // Revert padding
643
+ tickmode: 'array', // Explicitly set ticks
644
+ tickvals: Array.from({ length: topN }, (_, i) => i + 1), // Only show ticks 1 to N
645
+ zeroline: false
646
  },
647
  yaxis: {
648
  ...darkLayout.yaxis,
649
+ showticklabels: false, // Hide native ticks
650
+ automargin: false, // We handle margin manually
651
+ range: [-1, topN + 2], // Add vertical padding
652
+ zeroline: false
 
 
653
  },
654
  showlegend: false,
655
  annotations: [
 
660
  yref: 'paper',
661
  text: `ρ = ${rho.toFixed(2)}`,
662
  showarrow: false,
663
+ font: { size: 11, color: '#515154', family: '-apple-system, BlinkMacSystemFont, "SF Pro Text", sans-serif' },
664
+ bgcolor: 'rgba(255, 255, 255, 0.9)',
665
  borderpad: 4
666
  },
667
  {
 
671
  yref: 'paper',
672
  text: sortLabel,
673
  showarrow: false,
674
+ font: { size: 10, color: mode === 'novelty' ? PROPRIETARY_COLOR : OPENSOURCE_COLOR, family: '-apple-system, BlinkMacSystemFont, "SF Pro Text", sans-serif' },
675
+ bgcolor: 'rgba(255, 255, 255, 0.9)',
676
  borderpad: 4
677
  }
678
  ],
679
+ // Adjust margins: Left needs to be smaller since labels are now inside the plot area (but visually left)
680
+ // Actually, since we extended X-range, we can keep normal margins or reduce left
681
+ margin: { t: 15, r: 15, b: 40, l: 20 }
682
  };
683
 
684
  if (animate) {
685
+ Plotly.animate(`ranking-${id}`, {
686
+ data: traces,
687
+ layout: layout
688
+ }, animationSettings);
689
  } else {
690
  Plotly.newPlot(`ranking-${id}`, traces, layout, plotlyConfig);
691
  }
 
693
  }
694
 
695
  function initRankingCharts() {
696
+ // Check if data is loaded
697
+ if (typeof DDR_DATA === 'undefined' || !DDR_DATA.ranking) {
698
+ setTimeout(initRankingCharts, 100);
699
+ return;
700
+ }
701
  renderRankingCharts('novelty', false);
702
  }
703
 
704
  // Ranking mode toggle event listener
705
+ document.addEventListener('DOMContentLoaded', () => {
706
+ const rankingButtons = document.querySelectorAll('#ranking .dim-btn');
707
+ rankingButtons.forEach(btn => {
708
+ btn.addEventListener('click', () => {
709
+ const mode = btn.dataset.mode;
710
+ if (mode === currentRankingMode) return;
711
+
712
+ // Only update ranking buttons
713
+ rankingButtons.forEach(b => b.classList.remove('active'));
714
+ btn.classList.add('active');
715
+
716
+ currentRankingMode = mode;
717
+ renderRankingCharts(mode, true);
718
+ });
719
  });
720
  });
721
 
722
  // ============================================================================
723
  // TURN DISTRIBUTION - 3 Charts (Ridgeline style)
724
  // ============================================================================
 
725
  const TURN_DISPLAY_NAMES = {
726
  'run_api_deepseek_deepseek-chat': 'DeepSeek-V3.2',
727
  'qwen3-next-80b-a3b-instruct': 'Qwen3-Next-80BA3B',
 
758
  }
759
 
760
  function initTurnCharts() {
761
+ // Check if data is loaded
762
+ if (typeof DDR_DATA === 'undefined' || !DDR_DATA.turn) {
763
+ setTimeout(initTurnCharts, 100);
764
+ return;
765
+ }
766
+
767
  const scenarios = ['mimic', '10k', 'globem'];
768
 
769
+ // Family colors matching the Python script
770
  const familyColors = {
771
+ 'claude': '#D97706',
772
+ 'gpt': '#10A37F',
773
+ 'gemini': '#4285F4',
774
+ 'deepseek': '#1E3A8A',
775
+ 'glm': '#7C3AED',
776
+ 'kimi': '#DC2626',
777
+ 'minimax': '#EC4899',
778
  'qwen': '#0EA5E9',
779
  'llama': '#F59E0B'
780
  };
 
784
  for (const [family, color] of Object.entries(familyColors)) {
785
  if (lower.includes(family)) return color;
786
  }
787
+ return '#666666';
788
  }
789
 
790
  scenarios.forEach(scenario => {
791
  const data = DDR_DATA.turn[scenario];
792
  if (!data) return;
793
 
794
+ // Sort by median descending to get top 15
795
  const sortedData = [...data].sort((a, b) => b.median - a.median);
796
 
797
+ // Limit to top 15 models, then reverse so highest median is at top of chart
798
+ const displayData = sortedData.slice(0, 15).reverse();
799
 
800
  const traces = [];
 
801
  const binCenters = [5, 15, 25, 35, 45, 55, 65, 75, 85, 95];
802
 
 
803
  displayData.forEach((model, idx) => {
804
  const color = getModelColor(model.model);
805
  const yOffset = idx;
806
  const displayName = getTurnDisplayName(model.model);
 
 
807
  const maxDist = Math.max(...model.distribution) || 1;
 
808
 
809
+ // Original bin centers and values
810
+ const binCenters = [5, 15, 25, 35, 45, 55, 65, 75, 85, 95];
811
+ const binValues = model.distribution.map(d => d / maxDist * 0.75);
812
+
813
+ // Interpolate more points for smoother curve (similar to KDE)
814
+ const xSmooth = [];
815
+ const ySmooth = [];
816
+
817
+ // Add start point at baseline
818
+ xSmooth.push(0);
819
+ ySmooth.push(yOffset);
820
+
821
+ // Interpolate between bin centers for smoothness
822
+ for (let i = 0; i < binCenters.length; i++) {
823
+ xSmooth.push(binCenters[i]);
824
+ ySmooth.push(yOffset + binValues[i]);
825
+ }
826
+
827
+ // Add end point at baseline
828
+ xSmooth.push(100);
829
+ ySmooth.push(yOffset);
830
+
831
+ // Create the curve trace with spline smoothing
832
  traces.push({
833
+ x: xSmooth,
834
+ y: ySmooth,
835
  mode: 'lines',
836
+ line: {
837
+ color: color,
838
+ width: 2,
839
+ shape: 'spline', // Smooth spline interpolation
840
+ smoothing: 1.3 // Smoothing factor
841
+ },
842
  fill: 'toself',
843
+ fillcolor: color + '60',
 
844
  name: displayName,
845
+ hovertemplate: `<b>${displayName}</b><br>Median: ${model.median}<extra></extra>`,
 
 
 
 
 
 
 
 
 
 
 
 
 
846
  showlegend: false
847
  });
848
  });
 
851
  ...darkLayout,
852
  xaxis: {
853
  ...darkLayout.xaxis,
854
+ title: { text: 'Number of Turns', font: { size: 12, color: '#1d1d1f' } },
855
  range: [0, 100],
856
  dtick: 20
857
  },
858
  yaxis: {
859
  ...darkLayout.yaxis,
860
  tickmode: 'array',
861
+ tickvals: displayData.map((_, i) => i + 0.35),
862
  ticktext: displayData.map(m => getTurnDisplayName(m.model)),
863
  automargin: true,
864
+ range: [-0.5, displayData.length],
865
+ showgrid: false,
866
+ zeroline: false
867
  },
868
  margin: { ...darkLayout.margin, l: 140 },
869
  showlegend: false
 
877
  // PROBING RESULTS - 3 Charts with animated mode switching
878
  // ============================================================================
879
  function initProbingCharts() {
880
+ // Check if data is loaded
881
+ if (typeof DDR_DATA === 'undefined' || !DDR_DATA.probing) {
882
+ setTimeout(initProbingCharts, 100);
883
+ return;
884
+ }
885
+ renderProbingCharts('byProgress');
886
  }
887
 
888
  function renderProbingCharts(mode) {
 
890
  const scenarioIds = { 'mimic': 'mimic', 'globem': 'globem', '10k': '10k' };
891
 
892
  scenarios.forEach(scenario => {
893
+ const modeKey = mode === 'byTurn' ? 'byTurn' : 'byProgress';
894
+ const data = DDR_DATA.probing[modeKey]?.[scenario];
895
  if (!data) return;
896
 
897
  const traces = [];
898
+ const allModels = Object.keys(data);
899
+ // Filter out 7B and 14B models
900
+ const models = allModels.filter(m => !m.includes('7B') && !m.includes('14B'));
901
 
902
  models.forEach(model => {
903
  const modelData = data[model];
904
  const xKey = mode === 'byTurn' ? 'turns' : 'progress';
905
  const xLabel = mode === 'byTurn' ? 'Turn' : 'Progress (%)';
906
 
907
+ // Main line - CONSISTENT STYLE
908
  traces.push({
909
  x: modelData[xKey],
910
  y: modelData.logprob,
911
+ mode: 'lines+markers', // Show both lines and data points
912
  name: model,
913
  line: {
914
+ color: (DDR_DATA.modelColors && DDR_DATA.modelColors[model]) || '#888',
915
  width: 2
916
  },
917
+ marker: { size: 6, color: (DDR_DATA.modelColors && DDR_DATA.modelColors[model]) || '#888' },
 
 
 
918
  hovertemplate: `<b>${model}</b><br>${xLabel}: %{x}<br>Log Prob: %{y:.2f}<extra></extra>`
919
  });
920
 
 
927
  x: [...modelData[xKey], ...modelData[xKey].slice().reverse()],
928
  y: [...upper, ...lower.slice().reverse()],
929
  fill: 'toself',
930
+ fillcolor: ((DDR_DATA.modelColors && DDR_DATA.modelColors[model]) || '#888') + '25',
931
  line: { width: 0 },
932
  showlegend: false,
933
  hoverinfo: 'skip'
 
935
  }
936
  });
937
 
938
+ // Set different x-axis ranges based on mode
939
+ const xaxisConfig = mode === 'byTurn' ? {
940
+ title: { text: 'Turn', font: { size: 11, color: '#1d1d1f' } },
941
+ range: [0.5, 10.5], // Turns from 1-10
942
+ dtick: 1
943
+ } : {
944
+ title: { text: 'Interaction Progress (%)', font: { size: 11, color: '#1d1d1f' } },
945
+ range: [0, 100], // Progress from 0-100%
946
+ dtick: 10
947
+ };
948
+
949
  const layout = {
950
  ...darkLayout,
951
  xaxis: {
952
  ...darkLayout.xaxis,
953
+ ...xaxisConfig
954
  },
955
  yaxis: {
956
  ...darkLayout.yaxis,
957
+ title: { text: 'Avg Log Probability', font: { size: 11, color: '#1d1d1f' } }
958
  },
959
+ showlegend: false // Use shared legend instead
960
  };
961
 
962
+ const chartId = `probing-${scenarioIds[scenario]}`;
963
+
964
+ // Check if chart exists
965
+ const chartDiv = document.getElementById(chartId);
966
+ if (chartDiv && chartDiv.data) {
967
+ // Use animate for smooth transition with layout update
968
+ Plotly.animate(chartId, {
969
+ data: traces,
970
+ layout: layout
971
+ }, animationSettings);
972
+ } else {
973
+ // Initial plot
974
+ Plotly.newPlot(chartId, traces, layout, plotlyConfig);
975
+ }
976
  });
 
977
 
978
+ // Populate shared legend with filtered models from first available scenario
979
+ const firstScenario = scenarios.find(s => DDR_DATA.probing[mode === 'byTurn' ? 'byTurn' : 'byProgress']?.[s]);
980
+ if (firstScenario) {
981
+ const allModels = Object.keys(DDR_DATA.probing[mode === 'byTurn' ? 'byTurn' : 'byProgress'][firstScenario]);
982
+ const filteredModels = allModels.filter(m => !m.includes('7B') && !m.includes('14B'));
983
+ populateSharedLegend('probing-legend', filteredModels, DDR_DATA.modelColors);
984
+ }
985
 
986
+ // Apply hover effects after charts are rendered
987
+ setTimeout(() => applyHoverEffectsForSection('probing'), 100);
988
+ }
989
 
 
 
 
 
990
 
 
 
 
 
 
 
 
 
991
 
992
  // ============================================================================
993
  // ERROR ANALYSIS - Hierarchical Bar Chart
994
  // ============================================================================
995
  function initErrorChart() {
996
+ // Check if data is loaded
997
+ if (typeof DDR_DATA === 'undefined') {
998
+ setTimeout(initErrorChart, 100);
999
+ return;
1000
+ }
1001
+
1002
  const data = DDR_DATA.error;
1003
  if (!data || data.length === 0) return;
1004
 
 
1022
  },
1023
  text: data.map(d => `${d.percentage}%`),
1024
  textposition: 'outside',
1025
+ textfont: { size: 11, color: '#1d1d1f' },
1026
  hovertemplate: '<b>%{x}</b><br>%{y:.1f}%<br>Count: %{customdata}<extra></extra>',
1027
  customdata: data.map(d => d.count),
1028
  showlegend: false
 
1039
  y: maxPct * 1.15,
1040
  text: `<b>${catName}</b>`,
1041
  showarrow: false,
1042
+ font: { size: 10, color: '#1d1d1f' },
1043
  xanchor: 'center',
1044
  yanchor: 'bottom'
1045
  });
 
1050
  xaxis: {
1051
  ...darkLayout.xaxis,
1052
  tickangle: -30,
1053
+ tickfont: { size: 10, color: '#515154' }
1054
  },
1055
  yaxis: {
1056
  ...darkLayout.yaxis,
1057
+ title: { text: 'Percentage (%)', font: { size: 11, color: '#1d1d1f' } },
1058
  range: [0, maxPct * 1.25]
1059
  },
1060
  annotations: annotations,
 
1065
  }
1066
 
1067
  // ============================================================================
1068
+ // ENTROPY ANALYSIS - Scatter plots by model (Entropy vs Coverage, Opacity = Accuracy)
1069
+ // ============================================================================
1070
+ const ENTROPY_MODELS = [
1071
+ 'GPT-5.2',
1072
+ 'Claude-4.5-Sonnet',
1073
+ 'Gemini-3-Flash',
1074
+ 'GLM-4.6',
1075
+ 'Qwen3-Next-80B-A3B',
1076
+ 'DeepSeek-V3.2'
1077
+ ];
1078
+
1079
+ let currentEntropyScenario = '10k';
1080
+
1081
+ function initEntropyCharts() {
1082
+ if (typeof ENTROPY_DATA === 'undefined') {
1083
+ // Retry if data not loaded yet
1084
+ setTimeout(initEntropyCharts, 100);
1085
+ return;
1086
+ }
1087
+
1088
+ // Setup toggle buttons
1089
+ document.querySelectorAll('[data-entropy-scenario]').forEach(btn => {
1090
+ btn.addEventListener('click', () => {
1091
+ document.querySelectorAll('[data-entropy-scenario]').forEach(b => b.classList.remove('active'));
1092
+ btn.classList.add('active');
1093
+ currentEntropyScenario = btn.dataset.entropyScenario;
1094
+ renderEntropyCharts(currentEntropyScenario);
1095
+ });
1096
+ });
1097
+
1098
+ // Initial render
1099
+ renderEntropyCharts('10k');
1100
+ }
1101
+
1102
+ function renderEntropyCharts(scenario) {
1103
+ const entropyData = ENTROPY_DATA;
1104
+ const datasetInfo = entropyData.datasets[scenario];
1105
+
1106
+ if (!datasetInfo) {
1107
+ console.error(`No entropy data for scenario: ${scenario}`);
1108
+ return;
1109
+ }
1110
+
1111
+ const points = datasetInfo.points;
1112
+ const yMax = datasetInfo.y_max || 1;
1113
+ const accMin = datasetInfo.acc_min || 0;
1114
+ const accMax = datasetInfo.acc_max || 100;
1115
+ const hasAccRange = accMax > accMin;
1116
+ const colors = entropyData.modelColors;
1117
+
1118
+ // Group points by model
1119
+ const modelGroups = {};
1120
+ points.forEach(p => {
1121
+ if (!modelGroups[p.model]) {
1122
+ modelGroups[p.model] = [];
1123
+ }
1124
+ modelGroups[p.model].push(p);
1125
+ });
1126
+
1127
+ // Render each model's subplot
1128
+ ENTROPY_MODELS.forEach((model, idx) => {
1129
+ const chartId = `entropy-model-${idx}`;
1130
+ const titleId = `entropy-model-${idx}-title`;
1131
+ const color = colors[model] || '#888888';
1132
+ const pts = modelGroups[model] || [];
1133
+
1134
+ // Update title with sample count
1135
+ const titleEl = document.getElementById(titleId);
1136
+ if (titleEl) {
1137
+ titleEl.textContent = `${model} (n=${pts.length})`;
1138
+ }
1139
+
1140
+ if (pts.length === 0) {
1141
+ // Show empty chart with message
1142
+ const layout = {
1143
+ ...darkLayout,
1144
+ xaxis: { ...darkLayout.xaxis, range: [0.6, 1.05], title: { text: 'Entropy', font: { size: 10, color: '#1d1d1f' } } },
1145
+ yaxis: { ...darkLayout.yaxis, range: [-0.05, yMax], title: { text: 'Coverage', font: { size: 10, color: '#1d1d1f' } } },
1146
+ annotations: [{
1147
+ text: 'No data',
1148
+ xref: 'paper', yref: 'paper',
1149
+ x: 0.5, y: 0.5,
1150
+ showarrow: false,
1151
+ font: { size: 14, color: '#888' }
1152
+ }]
1153
+ };
1154
+ Plotly.newPlot(chartId, [], layout, plotlyConfig);
1155
+ return;
1156
+ }
1157
+
1158
+ // Calculate alphas based on accuracy
1159
+ const alphas = pts.map(p => {
1160
+ if (hasAccRange) {
1161
+ return 0.15 + (p.accuracy - accMin) / (accMax - accMin) * 0.85;
1162
+ }
1163
+ return 0.7;
1164
+ });
1165
+
1166
+ const trace = {
1167
+ x: pts.map(p => p.entropy),
1168
+ y: pts.map(p => p.coverage),
1169
+ mode: 'markers',
1170
+ type: 'scatter',
1171
+ marker: {
1172
+ color: color,
1173
+ size: 7,
1174
+ opacity: alphas,
1175
+ line: { color: '#333', width: 0.5 }
1176
+ },
1177
+ name: model,
1178
+ text: pts.map(p => `Entropy: ${p.entropy.toFixed(3)}<br>Coverage: ${(p.coverage * 100).toFixed(1)}%<br>Accuracy: ${p.accuracy.toFixed(1)}%`),
1179
+ hovertemplate: '<b>' + model + '</b><br>%{text}<extra></extra>',
1180
+ showlegend: false
1181
+ };
1182
+
1183
+ const layout = {
1184
+ ...darkLayout,
1185
+ xaxis: {
1186
+ ...darkLayout.xaxis,
1187
+ title: { text: 'Entropy', font: { size: 10, color: '#1d1d1f' } },
1188
+ range: [0.6, 1.05],
1189
+ dtick: 0.1
1190
+ },
1191
+ yaxis: {
1192
+ ...darkLayout.yaxis,
1193
+ title: { text: 'Coverage', font: { size: 10, color: '#1d1d1f' } },
1194
+ range: [-0.05, yMax]
1195
+ },
1196
+ margin: { t: 20, r: 20, b: 50, l: 50 }
1197
+ };
1198
+
1199
+ const chartDiv = document.getElementById(chartId);
1200
+ if (chartDiv) {
1201
+ // Apply CSS fade-out
1202
+ chartDiv.style.transition = 'opacity 0.3s ease';
1203
+ chartDiv.style.opacity = '0.3';
1204
+
1205
+ setTimeout(() => {
1206
+ // Update chart with react (faster than newPlot)
1207
+ Plotly.react(chartId, [trace], layout, plotlyConfig);
1208
+
1209
+ // Fade back in
1210
+ chartDiv.style.opacity = '1';
1211
+
1212
+ // Re-apply hover effects after chart update
1213
+ addHoverHighlight(chartId);
1214
+ }, 150);
1215
+ } else {
1216
+ Plotly.newPlot(chartId, [trace], layout, plotlyConfig);
1217
+ // Apply hover effects for new chart
1218
+ setTimeout(() => addHoverHighlight(chartId), 50);
1219
+ }
1220
+ });
1221
+ }
1222
+
1223
+ // ============================================================================
1224
+ // INITIALIZE ALL CHARTS - Using Lazy Loading for Performance
1225
  // ============================================================================
1226
  document.addEventListener('DOMContentLoaded', () => {
1227
+ // Register all sections for lazy loading
1228
+ const sections = document.querySelectorAll('section.section');
1229
+ sections.forEach(section => {
1230
+ lazyLoadObserver.observe(section);
1231
+ });
1232
+
1233
+ // Immediately initialize the first visible section (scaling) for instant feedback
1234
+ // Other sections will be lazy-loaded as user scrolls
1235
+ if (document.getElementById('scaling')) {
1236
+ initializedCharts.add('scaling');
1237
+ // Use setTimeout to not block the main thread
1238
+ setTimeout(() => initScalingCharts(), 0);
1239
+ }
1240
  });
1241
 
1242
+ // Handle window resize with longer debounce for better performance
1243
  let resizeTimeout;
1244
+ const resizeHandler = throttle(() => {
1245
+ // Only resize charts that have been initialized
1246
+ if (initializedCharts.has('scaling')) {
1247
+ ['mimic', '10k', 'globem'].forEach(s => {
1248
+ const el = document.getElementById(`scaling-${s}`);
1249
+ if (el && el.data) Plotly.Plots.resize(el);
1250
+ });
1251
+ }
1252
+ if (initializedCharts.has('ranking')) {
1253
  ['mimic', '10k', 'globem'].forEach(s => {
1254
+ const el = document.getElementById(`ranking-${s}`);
1255
+ if (el && el.data) Plotly.Plots.resize(el);
 
 
1256
  });
1257
+ }
1258
+ if (initializedCharts.has('turn')) {
1259
+ ['mimic', '10k', 'globem'].forEach(s => {
1260
+ const el = document.getElementById(`turn-${s}`);
1261
+ if (el && el.data) Plotly.Plots.resize(el);
1262
+ });
1263
+ }
1264
+ if (initializedCharts.has('probing')) {
1265
+ ['mimic', '10k', 'globem'].forEach(s => {
1266
+ const el = document.getElementById(`probing-${s}`);
1267
+ if (el && el.data) Plotly.Plots.resize(el);
1268
+ });
1269
+ }
1270
+ if (initializedCharts.has('entropy')) {
1271
+ for (let i = 0; i < 6; i++) {
1272
+ const el = document.getElementById(`entropy-model-${i}`);
1273
+ if (el && el.data) Plotly.Plots.resize(el);
1274
  }
1275
+ }
1276
+ if (initializedCharts.has('error')) {
1277
+ const el = document.getElementById('error-chart');
1278
+ if (el && el.data) Plotly.Plots.resize(el);
1279
+ }
1280
+ }, 250);
1281
+
1282
+ window.addEventListener('resize', () => {
1283
+ clearTimeout(resizeTimeout);
1284
+ resizeTimeout = setTimeout(resizeHandler, 250);
1285
  });
1286
+
1287
+ // ============================================================================
1288
+ // HOVER HIGHLIGHT EFFECTS - Optimized with batched updates
1289
+ // ============================================================================
1290
+ function addHoverHighlight(chartId) {
1291
+ const chart = document.getElementById(chartId);
1292
+ if (!chart || !chart.on) return;
1293
+
1294
+ let lastHoveredTrace = null;
1295
+ let lastHoveredPoint = null;
1296
+ let isAnimating = false;
1297
+
1298
+ // Throttled hover handler to prevent excessive updates
1299
+ const handleHover = throttle(function (data) {
1300
+ if (!data || !data.points || !data.points[0]) return;
1301
+
1302
+ const point = data.points[0];
1303
+ const traceIndex = point.curveNumber;
1304
+ const pointIndex = point.pointNumber;
1305
+
1306
+ // Skip if same point or currently animating
1307
+ if ((traceIndex === lastHoveredTrace && pointIndex === lastHoveredPoint) || isAnimating) return;
1308
+
1309
+ lastHoveredTrace = traceIndex;
1310
+ lastHoveredPoint = pointIndex;
1311
+ isAnimating = true;
1312
+
1313
+ // Build batch update arrays
1314
+ const opacities = [];
1315
+ const markerSizes = [];
1316
+ const lineWidths = [];
1317
+ const traceIndices = [];
1318
+
1319
+ const numTraces = chart.data?.length || 0;
1320
+
1321
+ for (let i = 0; i < numTraces; i++) {
1322
+ const trace = chart.data[i];
1323
+ if (!trace) continue;
1324
+
1325
+ // Skip fill traces (error bands)
1326
+ if (trace.fill === 'toself') continue;
1327
+
1328
+ traceIndices.push(i);
1329
+
1330
+ if (i === traceIndex) {
1331
+ opacities.push(1);
1332
+ lineWidths.push(4);
1333
+ const numPoints = trace.x?.length || 0;
1334
+ const sizes = Array(numPoints).fill(6);
1335
+ if (pointIndex < numPoints) sizes[pointIndex] = 12;
1336
+ markerSizes.push(sizes);
1337
+ } else {
1338
+ opacities.push(0.4);
1339
+ lineWidths.push(2);
1340
+ const numPoints = trace.x?.length || 0;
1341
+ markerSizes.push(Array(numPoints).fill(6));
1342
+ }
1343
+ }
1344
+
1345
+ // Single batched restyle call
1346
+ requestAnimationFrame(() => {
1347
+ if (traceIndices.length > 0) {
1348
+ Plotly.restyle(chartId, {
1349
+ 'opacity': opacities,
1350
+ 'marker.size': markerSizes,
1351
+ 'line.width': lineWidths
1352
+ }, traceIndices).then(() => {
1353
+ isAnimating = false;
1354
+ }).catch(() => {
1355
+ isAnimating = false;
1356
+ });
1357
+ } else {
1358
+ isAnimating = false;
1359
+ }
1360
+ });
1361
+ }, 50); // Throttle to max 20 updates per second
1362
+
1363
+ chart.on('plotly_hover', handleHover);
1364
+
1365
+ chart.on('plotly_unhover', function () {
1366
+ lastHoveredTrace = null;
1367
+ lastHoveredPoint = null;
1368
+
1369
+ const numTraces = chart.data?.length || 0;
1370
+ if (numTraces === 0) return;
1371
+
1372
+ // Build reset arrays
1373
+ const opacities = [];
1374
+ const markerSizes = [];
1375
+ const lineWidths = [];
1376
+ const traceIndices = [];
1377
+
1378
+ for (let i = 0; i < numTraces; i++) {
1379
+ const trace = chart.data[i];
1380
+ if (!trace) continue;
1381
+
1382
+ // Skip fill traces
1383
+ if (trace.fill === 'toself') continue;
1384
+
1385
+ traceIndices.push(i);
1386
+ opacities.push(1);
1387
+ lineWidths.push(2);
1388
+ const numPoints = trace.x?.length || 0;
1389
+ markerSizes.push(Array(numPoints).fill(6));
1390
+ }
1391
+
1392
+ // Single batched reset call
1393
+ if (traceIndices.length > 0) {
1394
+ requestAnimationFrame(() => {
1395
+ Plotly.restyle(chartId, {
1396
+ 'opacity': opacities,
1397
+ 'marker.size': markerSizes,
1398
+ 'line.width': lineWidths
1399
+ }, traceIndices);
1400
+ });
1401
+ }
1402
+ });
1403
+ }
1404
+
1405
+ // Apply hover effects when charts are initialized (called from init functions)
1406
+ function applyHoverEffectsForSection(sectionId) {
1407
+ requestAnimationFrame(() => {
1408
+ switch (sectionId) {
1409
+ case 'scaling':
1410
+ ['mimic', '10k', 'globem'].forEach(s => addHoverHighlight(`scaling-${s}`));
1411
+ break;
1412
+ case 'probing':
1413
+ ['mimic', '10k', 'globem'].forEach(s => addHoverHighlight(`probing-${s}`));
1414
+ break;
1415
+ case 'entropy':
1416
+ for (let i = 0; i < 6; i++) addHoverHighlight(`entropy-model-${i}`);
1417
+ break;
1418
+ }
1419
+ });
1420
+ }
data.js CHANGED
The diff for this file is too large to render. See raw diff
 
index.html CHANGED
@@ -10,7 +10,38 @@
10
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
11
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
12
  <script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script>
 
 
 
13
  <link rel="stylesheet" href="styles.css">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  </head>
15
 
16
  <body>
@@ -55,6 +86,7 @@
55
  <button class="dim-btn" data-dim="token">📊 Tokens</button>
56
  <button class="dim-btn" data-dim="cost">💰 Cost</button>
57
  </div>
 
58
  <div class="charts-grid three-col">
59
  <div class="chart-card">
60
  <h3>MIMIC</h3>
@@ -122,7 +154,46 @@
122
  </div>
123
  </section>
124
 
125
- <!-- 4. Error Analysis Section -->
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  <section id="error" class="section visible">
127
  <div class="section-header">
128
  <h2>⚠️ Error Analysis</h2>
@@ -130,33 +201,30 @@
130
  </div>
131
  <div class="charts-grid single">
132
  <div class="chart-card wide">
133
- <div id="error-chart" class="chart-container"></div>
134
  </div>
135
  </div>
136
  </section>
137
 
138
- <!-- 5. Probing Results Section -->
139
  <section id="probing" class="section visible">
140
  <div class="section-header">
141
  <h2>🔍 Probing Results</h2>
142
  <p>Analyze the average log probability of FINISH messages across conversation turns and progress.</p>
143
  </div>
144
- <div class="dimension-toggle">
145
- <button class="dim-btn probing-dim active" data-mode="byTurn">📊 By Turn</button>
146
- <button class="dim-btn probing-dim" data-mode="byProgress">📈 By Progress (%)</button>
147
- </div>
148
  <div class="charts-grid three-col">
149
  <div class="chart-card">
150
  <h3>MIMIC</h3>
151
- <div id="probing-mimic" class="chart-container"></div>
152
  </div>
153
  <div class="chart-card">
154
  <h3>GLOBEM</h3>
155
- <div id="probing-globem" class="chart-container"></div>
156
  </div>
157
  <div class="chart-card">
158
  <h3>10-K</h3>
159
- <div id="probing-10k" class="chart-container"></div>
160
  </div>
161
  </div>
162
  </section>
@@ -167,8 +235,7 @@
167
  <p>DDR-Bench © 2026 | Deep Data Research Agent Benchmark</p>
168
  </footer>
169
 
170
- <script src="data.js"></script>
171
- <script src="charts.js"></script>
172
  </body>
173
 
174
  </html>
 
10
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
11
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
12
  <script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script>
13
+ <script src="data.js" defer></script>
14
+ <script src="entropy_data.js" defer></script>
15
+ <script src="charts.js" defer></script>
16
  <link rel="stylesheet" href="styles.css">
17
+ <style>
18
+ /* Inline critical CSS for chart loading states */
19
+ .chart-loading {
20
+ display: flex;
21
+ align-items: center;
22
+ justify-content: center;
23
+ min-height: 300px;
24
+ color: #86868b;
25
+ font-size: 14px;
26
+ }
27
+
28
+ .chart-loading::after {
29
+ content: 'Loading chart...';
30
+ animation: pulse 1.5s ease-in-out infinite;
31
+ }
32
+
33
+ @keyframes pulse {
34
+
35
+ 0%,
36
+ 100% {
37
+ opacity: 0.4;
38
+ }
39
+
40
+ 50% {
41
+ opacity: 1;
42
+ }
43
+ }
44
+ </style>
45
  </head>
46
 
47
  <body>
 
86
  <button class="dim-btn" data-dim="token">📊 Tokens</button>
87
  <button class="dim-btn" data-dim="cost">💰 Cost</button>
88
  </div>
89
+ <div id="scaling-legend" class="shared-legend"></div>
90
  <div class="charts-grid three-col">
91
  <div class="chart-card">
92
  <h3>MIMIC</h3>
 
154
  </div>
155
  </section>
156
 
157
+ <!-- 4. Entropy Analysis Section -->
158
+ <section id="entropy" class="section visible">
159
+ <div class="section-header">
160
+ <h2>🔬 Entropy Analysis</h2>
161
+ <p>Scatter plot showing Access Entropy vs Coverage by model. Opacity represents accuracy. Higher entropy
162
+ = more uniform access; Higher coverage = more fields explored.</p>
163
+ </div>
164
+ <div class="dimension-toggle">
165
+ <button class="toggle-btn active" data-entropy-scenario="10k">10-K</button>
166
+ <button class="toggle-btn" data-entropy-scenario="mimic">MIMIC</button>
167
+ </div>
168
+ <div class="charts-grid three-col">
169
+ <div class="chart-card">
170
+ <h3 id="entropy-model-0-title">GPT-5.2</h3>
171
+ <div id="entropy-model-0" class="chart-container-tall"></div>
172
+ </div>
173
+ <div class="chart-card">
174
+ <h3 id="entropy-model-1-title">Claude-4.5-Sonnet</h3>
175
+ <div id="entropy-model-1" class="chart-container-tall"></div>
176
+ </div>
177
+ <div class="chart-card">
178
+ <h3 id="entropy-model-2-title">Gemini-3-Flash</h3>
179
+ <div id="entropy-model-2" class="chart-container-tall"></div>
180
+ </div>
181
+ <div class="chart-card">
182
+ <h3 id="entropy-model-3-title">GLM-4.6</h3>
183
+ <div id="entropy-model-3" class="chart-container-tall"></div>
184
+ </div>
185
+ <div class="chart-card">
186
+ <h3 id="entropy-model-4-title">Qwen3-Next-80B-A3B</h3>
187
+ <div id="entropy-model-4" class="chart-container-tall"></div>
188
+ </div>
189
+ <div class="chart-card">
190
+ <h3 id="entropy-model-5-title">DeepSeek-V3.2</h3>
191
+ <div id="entropy-model-5" class="chart-container-tall"></div>
192
+ </div>
193
+ </div>
194
+ </section>
195
+
196
+ <!-- 5. Error Analysis Section -->
197
  <section id="error" class="section visible">
198
  <div class="section-header">
199
  <h2>⚠️ Error Analysis</h2>
 
201
  </div>
202
  <div class="charts-grid single">
203
  <div class="chart-card wide">
204
+ <div id="error-chart" class="chart-container-double"></div>
205
  </div>
206
  </div>
207
  </section>
208
 
209
+ <!-- 6. Probing Results Section -->
210
  <section id="probing" class="section visible">
211
  <div class="section-header">
212
  <h2>🔍 Probing Results</h2>
213
  <p>Analyze the average log probability of FINISH messages across conversation turns and progress.</p>
214
  </div>
215
+ <div id="probing-legend" class="shared-legend"></div>
 
 
 
216
  <div class="charts-grid three-col">
217
  <div class="chart-card">
218
  <h3>MIMIC</h3>
219
+ <div id="probing-mimic" class="chart-container-tall"></div>
220
  </div>
221
  <div class="chart-card">
222
  <h3>GLOBEM</h3>
223
+ <div id="probing-globem" class="chart-container-tall"></div>
224
  </div>
225
  <div class="chart-card">
226
  <h3>10-K</h3>
227
+ <div id="probing-10k" class="chart-container-tall"></div>
228
  </div>
229
  </div>
230
  </section>
 
235
  <p>DDR-Bench © 2026 | Deep Data Research Agent Benchmark</p>
236
  </footer>
237
 
238
+ <!-- Scripts loaded via defer in head for better parallelization -->
 
239
  </body>
240
 
241
  </html>
styles.css CHANGED
@@ -1,21 +1,22 @@
1
- /* Root Variables */
2
  :root {
3
- --primary: #6366f1;
4
- --primary-dark: #4f46e5;
5
- --primary-light: #818cf8;
6
- --secondary: #10b981;
7
- --accent: #f59e0b;
8
- --bg-dark: #0f172a;
9
- --bg-card: #1e293b;
10
- --bg-card-hover: #334155;
11
- --text-primary: #f1f5f9;
12
- --text-secondary: #94a3b8;
13
- --text-muted: #64748b;
14
- --border: #334155;
15
- --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.3), 0 2px 4px -2px rgba(0, 0, 0, 0.2);
16
- --shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.4), 0 4px 6px -4px rgba(0, 0, 0, 0.3);
17
- --gradient-primary: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%);
18
- --gradient-hero: linear-gradient(135deg, #1e293b 0%, #0f172a 50%, #1a1f3c 100%);
 
19
  }
20
 
21
  /* Reset & Base */
@@ -27,90 +28,71 @@
27
  padding: 0;
28
  }
29
 
30
- html {
31
- scroll-behavior: smooth;
32
- }
33
-
34
  body {
35
- font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
36
- background-color: var(--bg-dark);
37
  color: var(--text-primary);
38
- line-height: 1.6;
 
39
  min-height: 100vh;
 
40
  }
41
 
42
  /* Hero Section */
43
  .hero {
44
- background: var(--gradient-hero);
45
- padding: 2rem 2rem 1.5rem;
46
  text-align: center;
47
- position: relative;
48
- overflow: hidden;
49
- }
50
-
51
- .hero::before {
52
- content: '';
53
- position: absolute;
54
- top: 0;
55
- left: 0;
56
- right: 0;
57
- bottom: 0;
58
- background:
59
- radial-gradient(circle at 20% 50%, rgba(99, 102, 241, 0.15) 0%, transparent 50%),
60
- radial-gradient(circle at 80% 50%, rgba(139, 92, 246, 0.1) 0%, transparent 50%);
61
- pointer-events: none;
62
  }
63
 
64
  .hero-content {
65
- max-width: 900px;
66
  margin: 0 auto;
67
- position: relative;
68
- z-index: 1;
69
  }
70
 
71
  .badge {
72
  display: inline-block;
73
- background: rgba(99, 102, 241, 0.2);
74
- color: var(--primary-light);
75
- padding: 0.3rem 0.7rem;
76
- border-radius: 2rem;
77
- font-size: 0.75rem;
78
- font-weight: 500;
79
- margin-bottom: 0.5rem;
80
- border: 1px solid rgba(99, 102, 241, 0.3);
81
  }
82
 
83
  .hero h1 {
84
- font-size: 2.5rem;
 
85
  font-weight: 700;
86
- background: linear-gradient(135deg, #f1f5f9 0%, #818cf8 100%);
87
- -webkit-background-clip: text;
88
- -webkit-text-fill-color: transparent;
89
- background-clip: text;
90
- margin-bottom: 0.4rem;
91
- letter-spacing: -0.02em;
92
  }
93
 
94
  .subtitle {
95
- font-size: 1rem;
96
- color: var(--text-secondary);
97
- margin-bottom: 0.5rem;
98
  font-weight: 400;
 
 
 
99
  }
100
 
101
  .description {
102
- font-size: 0.85rem;
103
- color: var(--text-muted);
 
 
 
104
  max-width: 700px;
105
- margin: 0 auto 1rem;
106
- line-height: 1.5;
107
  }
108
 
109
  .stats-row {
110
  display: flex;
111
  justify-content: center;
112
- gap: 2rem;
113
- margin-top: 1rem;
114
  }
115
 
116
  .stat-item {
@@ -119,90 +101,134 @@ body {
119
 
120
  .stat-value {
121
  display: block;
122
- font-size: 1.5rem;
123
- font-weight: 700;
124
- color: var(--primary-light);
125
  }
126
 
127
  .stat-label {
128
- font-size: 0.7rem;
129
- color: var(--text-muted);
 
130
  }
131
 
132
  /* Main Content */
133
  .content {
134
  max-width: 1800px;
 
135
  margin: 0 auto;
136
- padding: 1rem 2rem;
137
  }
138
 
139
- /* Sections - all visible */
140
  .section {
141
- display: block;
142
- margin-bottom: 2rem;
143
- padding-bottom: 1rem;
144
- border-bottom: 1px solid var(--border);
145
- }
146
-
147
- .section:last-child {
148
- border-bottom: none;
149
- margin-bottom: 0;
150
  }
151
 
152
  .section-header {
153
- margin-bottom: 1rem;
154
  text-align: center;
155
  }
156
 
157
  .section-header h2 {
158
- font-size: 1.25rem;
159
- font-weight: 600;
160
- color: var(--text-primary);
161
- margin-bottom: 0.25rem;
 
162
  }
163
 
164
  .section-header p {
165
- color: var(--text-muted);
166
- font-size: 0.8rem;
167
  }
168
 
169
- /* Dimension Toggle Buttons */
170
  .dimension-toggle {
171
  display: flex;
172
  justify-content: center;
173
- gap: 0.4rem;
174
- margin-bottom: 1rem;
175
  }
176
 
177
  .dim-btn {
178
- padding: 0.4rem 0.9rem;
179
- background: var(--bg-card);
180
- border: 1px solid var(--border);
181
- border-radius: 2rem;
182
- color: var(--text-secondary);
183
- font-size: 0.75rem;
184
- font-weight: 500;
185
  cursor: pointer;
186
  transition: all 0.2s ease;
187
  font-family: inherit;
188
  }
189
 
190
  .dim-btn:hover {
191
- background: var(--bg-card-hover);
192
- color: var(--text-primary);
193
  }
194
 
195
  .dim-btn.active {
196
- background: var(--gradient-primary);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  color: white;
198
- border-color: transparent;
199
- box-shadow: 0 2px 8px rgba(99, 102, 241, 0.3);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  }
201
 
202
  /* Charts Grid */
203
  .charts-grid {
204
  display: grid;
205
- gap: 1.25rem;
 
206
  }
207
 
208
  .charts-grid.three-col {
@@ -211,127 +237,98 @@ body {
211
 
212
  .charts-grid.single {
213
  grid-template-columns: 1fr;
214
- max-width: 900px;
215
  margin: 0 auto;
216
  }
217
 
218
- @media (max-width: 1200px) {
219
- .charts-grid.three-col {
220
- grid-template-columns: repeat(2, 1fr);
221
- }
222
- }
223
-
224
- @media (max-width: 768px) {
225
- .charts-grid.three-col {
226
- grid-template-columns: 1fr;
227
- }
228
- }
229
-
230
  /* Chart Card */
231
  .chart-card {
232
  background: var(--bg-card);
233
- border-radius: 0.5rem;
234
- padding: 0.75rem;
235
- border: 1px solid var(--border);
236
- box-shadow: var(--shadow);
 
 
 
 
237
  }
238
 
239
  .chart-card h3 {
240
- font-size: 0.85rem;
241
  font-weight: 600;
242
- color: var(--text-primary);
243
- margin-bottom: 0.5rem;
244
  text-align: center;
245
- padding-bottom: 0.4rem;
246
- border-bottom: 1px solid var(--border);
247
  }
248
 
249
  .chart-card.wide {
250
- padding: 1rem;
251
  }
252
 
253
- /* Chart Containers - Fixed heights */
254
  .chart-container {
255
- height: 350px;
 
256
  width: 100%;
257
- min-height: 350px;
258
  }
259
 
260
  .chart-container-tall {
261
- height: 500px;
 
 
 
 
 
 
 
 
262
  width: 100%;
263
- min-height: 500px;
264
  }
265
 
266
  /* Footer */
267
  .footer {
268
  text-align: center;
269
- padding: 1rem;
270
- color: var(--text-muted);
271
- font-size: 0.75rem;
 
272
  border-top: 1px solid var(--border);
273
- margin-top: 1rem;
274
  }
275
 
276
  /* Responsive */
277
- @media (max-width: 768px) {
278
- .hero {
279
- padding: 1.5rem 1rem;
280
  }
 
281
 
 
282
  .hero h1 {
283
- font-size: 1.75rem;
284
  }
285
 
286
  .subtitle {
287
- font-size: 0.9rem;
288
- }
289
-
290
- .stats-row {
291
- gap: 1rem;
292
- }
293
-
294
- .stat-value {
295
- font-size: 1.25rem;
296
- }
297
-
298
- .content {
299
- padding: 0.75rem;
300
  }
301
 
302
- .dimension-toggle {
303
- flex-wrap: wrap;
304
- }
305
-
306
- .dim-btn {
307
- padding: 0.35rem 0.7rem;
308
- font-size: 0.7rem;
309
  }
310
 
311
  .chart-container {
312
- height: 250px;
313
  }
314
 
315
  .chart-container-tall {
316
- height: 320px;
317
  }
318
  }
319
 
320
- /* Plotly overrides for dark theme */
321
  .js-plotly-plot .plotly .modebar {
322
- background: rgba(30, 41, 59, 0.9) !important;
323
- }
324
-
325
- .js-plotly-plot .plotly .modebar-btn path {
326
- fill: var(--text-secondary) !important;
327
- }
328
-
329
- .js-plotly-plot .plotly .modebar-btn:hover path {
330
- fill: var(--text-primary) !important;
331
- }
332
-
333
- /* Ensure Plotly charts don't overflow */
334
- .js-plotly-plot {
335
- width: 100% !important;
336
- height: 100% !important;
337
  }
 
1
+ /* Apple Style Minimalist Theme */
2
  :root {
3
+ --primary: #0071e3;
4
+ /* Apple Blue */
5
+ --primary-hover: #0077ed;
6
+ --bg-body: #f5f5f7;
7
+ /* Light grey background */
8
+ --bg-card: #ffffff;
9
+ --text-primary: #1d1d1f;
10
+ /* Apple Black */
11
+ --text-secondary: #515154;
12
+ /* Darker grey */
13
+ --border: #d2d2d7;
14
+ --shadow-card: 0 8px 30px rgba(0, 0, 0, 0.08);
15
+ /* Stronger shadow */
16
+ --radius-card: 20px;
17
+ --radius-btn: 980px;
18
+ /* Capsule */
19
+ --font-stack: "SF Pro Text", "SF Pro Icons", "Helvetica Neue", "Helvetica", "Arial", sans-serif;
20
  }
21
 
22
  /* Reset & Base */
 
28
  padding: 0;
29
  }
30
 
 
 
 
 
31
  body {
32
+ font-family: var(--font-stack);
33
+ background-color: var(--bg-body);
34
  color: var(--text-primary);
35
+ line-height: 1.47059;
36
+ letter-spacing: -0.022em;
37
  min-height: 100vh;
38
+ -webkit-font-smoothing: antialiased;
39
  }
40
 
41
  /* Hero Section */
42
  .hero {
43
+ padding: 4rem 2rem 2rem;
 
44
  text-align: center;
45
+ background: var(--bg-body);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  }
47
 
48
  .hero-content {
49
+ max-width: 980px;
50
  margin: 0 auto;
 
 
51
  }
52
 
53
  .badge {
54
  display: inline-block;
55
+ color: var(--primary);
56
+ font-size: 12px;
57
+ font-weight: 600;
58
+ margin-bottom: 0.8rem;
59
+ letter-spacing: 0.05em;
60
+ text-transform: uppercase;
 
 
61
  }
62
 
63
  .hero h1 {
64
+ font-size: 48px;
65
+ line-height: 1.08349;
66
  font-weight: 700;
67
+ letter-spacing: -0.003em;
68
+ margin-bottom: 0.5rem;
69
+ color: var(--text-primary);
 
 
 
70
  }
71
 
72
  .subtitle {
73
+ font-size: 24px;
74
+ line-height: 1.16667;
 
75
  font-weight: 400;
76
+ letter-spacing: 0.009em;
77
+ color: var(--text-primary);
78
+ margin-bottom: 1rem;
79
  }
80
 
81
  .description {
82
+ font-size: 17px;
83
+ line-height: 1.47059;
84
+ font-weight: 400;
85
+ letter-spacing: -0.022em;
86
+ color: var(--text-secondary);
87
  max-width: 700px;
88
+ margin: 0 auto 2rem;
 
89
  }
90
 
91
  .stats-row {
92
  display: flex;
93
  justify-content: center;
94
+ gap: 3rem;
95
+ margin-top: 2rem;
96
  }
97
 
98
  .stat-item {
 
101
 
102
  .stat-value {
103
  display: block;
104
+ font-size: 28px;
105
+ font-weight: 600;
106
+ color: var(--text-primary);
107
  }
108
 
109
  .stat-label {
110
+ font-size: 13px;
111
+ color: var(--text-secondary);
112
+ font-weight: 500;
113
  }
114
 
115
  /* Main Content */
116
  .content {
117
  max-width: 1800px;
118
+ /* Maximize width for charts */
119
  margin: 0 auto;
120
+ padding: 2rem;
121
  }
122
 
123
+ /* Sections */
124
  .section {
125
+ margin-bottom: 4rem;
 
 
 
 
 
 
 
 
126
  }
127
 
128
  .section-header {
129
+ margin-bottom: 2rem;
130
  text-align: center;
131
  }
132
 
133
  .section-header h2 {
134
+ font-size: 32px;
135
+ line-height: 1.125;
136
+ font-weight: 700;
137
+ letter-spacing: 0.004em;
138
+ margin-bottom: 0.5rem;
139
  }
140
 
141
  .section-header p {
142
+ font-size: 17px;
143
+ color: var(--text-secondary);
144
  }
145
 
146
+ /* Toggle Buttons */
147
  .dimension-toggle {
148
  display: flex;
149
  justify-content: center;
150
+ gap: 1rem;
151
+ margin-bottom: 1.5rem;
152
  }
153
 
154
  .dim-btn {
155
+ padding: 8px 16px;
156
+ background: rgba(0, 0, 0, 0.05);
157
+ border: none;
158
+ border-radius: var(--radius-btn);
159
+ color: var(--text-primary);
160
+ font-size: 14px;
161
+ font-weight: 400;
162
  cursor: pointer;
163
  transition: all 0.2s ease;
164
  font-family: inherit;
165
  }
166
 
167
  .dim-btn:hover {
168
+ background: rgba(0, 0, 0, 0.1);
 
169
  }
170
 
171
  .dim-btn.active {
172
+ background: var(--text-primary);
173
+ /* Black active state like Apple */
174
+ color: white;
175
+ }
176
+
177
+ .toggle-btn {
178
+ padding: 10px 20px;
179
+ background: rgba(0, 0, 0, 0.05);
180
+ border: none;
181
+ border-radius: var(--radius-btn);
182
+ color: var(--text-primary);
183
+ font-size: 14px;
184
+ font-weight: 500;
185
+ cursor: pointer;
186
+ transition: all 0.3s ease;
187
+ font-family: inherit;
188
+ }
189
+
190
+ .toggle-btn:hover {
191
+ background: rgba(0, 0, 0, 0.12);
192
+ }
193
+
194
+ .toggle-btn.active {
195
+ background: var(--text-primary);
196
  color: white;
197
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
198
+ }
199
+
200
+ /* Shared Legend for Scaling and Probing */
201
+ .shared-legend {
202
+ display: flex;
203
+ justify-content: center;
204
+ flex-wrap: wrap;
205
+ gap: 1.5rem;
206
+ margin-bottom: 1.5rem;
207
+ padding: 1rem;
208
+ background: var(--bg-card);
209
+ border-radius: 12px;
210
+ box-shadow: var(--shadow-card);
211
+ }
212
+
213
+ .legend-item {
214
+ display: flex;
215
+ align-items: center;
216
+ gap: 0.5rem;
217
+ font-size: 13px;
218
+ color: var(--text-primary);
219
+ }
220
+
221
+ .legend-color {
222
+ width: 24px;
223
+ height: 3px;
224
+ border-radius: 2px;
225
  }
226
 
227
  /* Charts Grid */
228
  .charts-grid {
229
  display: grid;
230
+ gap: 16px;
231
+ /* Tighter gap */
232
  }
233
 
234
  .charts-grid.three-col {
 
237
 
238
  .charts-grid.single {
239
  grid-template-columns: 1fr;
240
+ max-width: 1000px;
241
  margin: 0 auto;
242
  }
243
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  /* Chart Card */
245
  .chart-card {
246
  background: var(--bg-card);
247
+ border-radius: var(--radius-card);
248
+ padding: 24px;
249
+ box-shadow: var(--shadow-card);
250
+ transition: transform 0.3s ease, box-shadow 0.3s ease;
251
+ }
252
+
253
+ .chart-card:hover {
254
+ box-shadow: 0 8px 24px rgba(0, 0, 0, 0.08);
255
  }
256
 
257
  .chart-card h3 {
258
+ font-size: 14px;
259
  font-weight: 600;
260
+ color: var(--text-secondary);
261
+ margin-bottom: 1rem;
262
  text-align: center;
263
+ text-transform: uppercase;
264
+ letter-spacing: 0.05em;
265
  }
266
 
267
  .chart-card.wide {
268
+ padding: 32px;
269
  }
270
 
271
+ /* Chart Containers */
272
  .chart-container {
273
+ height: 300px;
274
+ /* Reduced height */
275
  width: 100%;
276
+ transition: opacity 0.3s ease;
277
  }
278
 
279
  .chart-container-tall {
280
+ height: 450px;
281
+ /* Reduced height */
282
+ transition: opacity 0.3s ease;
283
+ width: 100%;
284
+ }
285
+
286
+ .chart-container-double {
287
+ height: 600px;
288
+ /* Double height for error analysis */
289
  width: 100%;
 
290
  }
291
 
292
  /* Footer */
293
  .footer {
294
  text-align: center;
295
+ padding: 3rem 1rem;
296
+ color: var(--text-secondary);
297
+ font-size: 12px;
298
+ background: var(--bg-body);
299
  border-top: 1px solid var(--border);
 
300
  }
301
 
302
  /* Responsive */
303
+ @media (max-width: 1024px) {
304
+ .charts-grid.three-col {
305
+ grid-template-columns: repeat(2, 1fr);
306
  }
307
+ }
308
 
309
+ @media (max-width: 768px) {
310
  .hero h1 {
311
+ font-size: 36px;
312
  }
313
 
314
  .subtitle {
315
+ font-size: 20px;
 
 
 
 
 
 
 
 
 
 
 
 
316
  }
317
 
318
+ .charts-grid.three-col {
319
+ grid-template-columns: 1fr;
 
 
 
 
 
320
  }
321
 
322
  .chart-container {
323
+ height: 300px;
324
  }
325
 
326
  .chart-container-tall {
327
+ height: 400px;
328
  }
329
  }
330
 
331
+ /* Plotly Overrides */
332
  .js-plotly-plot .plotly .modebar {
333
+ display: none !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
  }