luisrguerra commited on
Commit
7780541
·
verified ·
1 Parent(s): cc187e8

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +59 -6
index.html CHANGED
@@ -154,6 +154,14 @@
154
  </ul>
155
  <h4>Versions of models already surpassed by fine-tune, new versions or new architectures:</h4>
156
  <ul>
 
 
 
 
 
 
 
 
157
  <li>gpt-4-0314</li>
158
  <li>Claude 2-2.1</li>
159
  <li>Claude Instant 1-1.2</li>
@@ -423,10 +431,58 @@
423
  license: 'Proprietary',
424
  },
425
  {
426
- name: 'Gemini Pro 1.5',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  mmlu: 81.9,
428
  mtbench: null,
429
- arenaelo:null,
430
  gsm8k: 91.7,
431
  winogrande: null,
432
  truthfulqa: null,
@@ -690,8 +746,6 @@
690
  function getDataSetRadar(data) {
691
  const mmluMaxValue = getBenchmarkMaxValue("mmlu",data);
692
  const mmluMultiplier = 100/mmluMaxValue;
693
- const mtbenchMaxValue = getBenchmarkMaxValue("mtbench",data);
694
- const mtbenchMultiplier = 100/mtbenchMaxValue;
695
  const arenaeloMaxValue = getBenchmarkMaxValue("arenaelo",data);
696
  const arenaeloMultiplier = 100/arenaeloMaxValue;
697
  const gsm8kMaxValue = getBenchmarkMaxValue("gsm8k",data);
@@ -714,7 +768,6 @@
714
  label: data[i].name,
715
  data: [
716
  (data[i].mmlu*mmluMultiplier),
717
- (data[i].mtbench*mtbenchMultiplier),
718
  (data[i].arenaelo*arenaeloMultiplier),
719
  (data[i].gsm8k*gsm8kMultiplier),
720
  (data[i].winogrande*winograndeMultiplier),
@@ -731,7 +784,7 @@
731
  }
732
  const dataSetRadar = getDataSetRadar(benchmarkData);
733
  let data = {
734
- labels: ['MMLU', 'MT-bench','Arena Elo','GSM8k','Winogrande','TruthfulQA','HellaSwag','ARC','AlpacaEval','Not Hallucination'],
735
  datasets: getDataSetRadar(benchmarkData)
736
  };
737
 
 
154
  </ul>
155
  <h4>Versions of models already surpassed by fine-tune, new versions or new architectures:</h4>
156
  <ul>
157
+ <li>Gemini Pro 1.0</li>
158
+ <li>Grok 1</li>
159
+ <li>DBRX Instruct</li>
160
+ <li>Mistral Medium</li>
161
+ <li>Gemma 1.0 7B</li>
162
+ <li>Zephyr-ORPO-141b-A35b-v0.1</li>
163
+ <li>Yi 1.0 34B</li>
164
+ <li>gpt-4-0613</li>
165
  <li>gpt-4-0314</li>
166
  <li>Claude 2-2.1</li>
167
  <li>Claude Instant 1-1.2</li>
 
431
  license: 'Proprietary',
432
  },
433
  {
434
+ name: 'Gemini-Advanced-0514',
435
+ mmlu: null,
436
+ mtbench: null,
437
+ arenaelo:1267,
438
+ gsm8k: null,
439
+ winogrande: null,
440
+ truthfulqa: null,
441
+ hellaswag:null,
442
+ arc:null,
443
+ nothallucination: null,
444
+ alpacaeval: null,
445
+ parameters: null,
446
+ organization: 'Google',
447
+ license: 'Proprietary',
448
+ },
449
+ {
450
+ name: 'Gemini-1.5-Flash-API-0514',
451
+ mmlu: 78.9,
452
+ mtbench: null,
453
+ arenaelo:1230,
454
+ gsm8k: null,
455
+ winogrande: null,
456
+ truthfulqa: null,
457
+ hellaswag:null,
458
+ arc:null,
459
+ nothallucination: null,
460
+ alpacaeval: null,
461
+ parameters: null,
462
+ organization: 'Google',
463
+ license: 'Proprietary',
464
+ },
465
+ {
466
+ name: 'Gemini-1.5-Pro-API-0514',
467
+ mmlu: 85.9,
468
+ mtbench: null,
469
+ arenaelo:1265,
470
+ gsm8k: null,
471
+ winogrande: null,
472
+ truthfulqa: null,
473
+ hellaswag:null,
474
+ arc:null,
475
+ nothallucination: null,
476
+ alpacaeval: null,
477
+ parameters: null,
478
+ organization: 'Google',
479
+ license: 'Proprietary',
480
+ },
481
+ {
482
+ name: 'Gemini-1.5-Pro-API-0409-Preview',
483
  mmlu: 81.9,
484
  mtbench: null,
485
+ arenaelo:1258,
486
  gsm8k: 91.7,
487
  winogrande: null,
488
  truthfulqa: null,
 
746
  function getDataSetRadar(data) {
747
  const mmluMaxValue = getBenchmarkMaxValue("mmlu",data);
748
  const mmluMultiplier = 100/mmluMaxValue;
 
 
749
  const arenaeloMaxValue = getBenchmarkMaxValue("arenaelo",data);
750
  const arenaeloMultiplier = 100/arenaeloMaxValue;
751
  const gsm8kMaxValue = getBenchmarkMaxValue("gsm8k",data);
 
768
  label: data[i].name,
769
  data: [
770
  (data[i].mmlu*mmluMultiplier),
 
771
  (data[i].arenaelo*arenaeloMultiplier),
772
  (data[i].gsm8k*gsm8kMultiplier),
773
  (data[i].winogrande*winograndeMultiplier),
 
784
  }
785
  const dataSetRadar = getDataSetRadar(benchmarkData);
786
  let data = {
787
+ labels: ['MMLU','Arena Elo','GSM8k','Winogrande','TruthfulQA','HellaSwag','ARC','AlpacaEval','Not Hallucination'],
788
  datasets: getDataSetRadar(benchmarkData)
789
  };
790