qnguyen3 commited on
Commit
882bf73
1 Parent(s): 0e17302

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +19 -2
README.md CHANGED
@@ -79,8 +79,9 @@ Nous Benchmark:
79
  |---------------------------------------------------|------:|------:|---------:|-------:|------:|
80
  |[Master-Yi-9B](https://huggingface.co/qnguyen3/Master-Yi-9B)| 43.55| 71.48| 48.54| 41.43| 51.25|
81
 
82
- ```
83
  ### AGIEval
 
84
  | Task |Version| Metric |Value| |Stderr|
85
  |------------------------------|------:|--------|----:|---|-----:|
86
  |agieval_aqua_rat | 0|acc |35.83|± | 3.01|
@@ -101,8 +102,10 @@ Nous Benchmark:
101
  | | |acc_norm|47.73|± | 3.38|
102
 
103
  Average: 43.55%
 
104
 
105
  ### GPT4All
 
106
  | Task |Version| Metric |Value| |Stderr|
107
  |-------------|------:|--------|----:|---|-----:|
108
  |arc_challenge| 0|acc |54.95|± | 1.45|
@@ -119,16 +122,20 @@ Average: 43.55%
119
  |winogrande | 0|acc |72.61|± | 1.25|
120
 
121
  Average: 71.48%
 
122
 
123
  ### TruthfulQA
 
124
  | Task |Version|Metric|Value| |Stderr|
125
  |-------------|------:|------|----:|---|-----:|
126
  |truthfulqa_mc| 1|mc1 |33.05|± | 1.65|
127
  | | |mc2 |48.54|± | 1.54|
128
 
129
  Average: 48.54%
 
130
 
131
  ### Bigbench
 
132
  | Task |Version| Metric |Value| |Stderr|
133
  |------------------------------------------------|------:|---------------------|----:|---|-----:|
134
  |bigbench_causal_judgement | 0|multiple_choice_grade|54.74|± | 3.62|
@@ -162,8 +169,8 @@ OpenLLM Benchmark:
162
  |---------------------------------------------------|---:|--------:|----:|---------:|---------:|----:|------:|
163
  |[Master-Yi-9B](https://huggingface.co/qnguyen3/Master-Yi-9B)|61.6| 79.89|69.95| 48.59| 77.35|67.48| 67.48|
164
 
165
- ```
166
  ### ARC
 
167
  | Task |Version| Metric | Value | |Stderr|
168
  |-------------|------:|--------------------|-------------|---|------|
169
  |arc_challenge| 1|acc,none | 0.59| | |
@@ -173,8 +180,10 @@ OpenLLM Benchmark:
173
  | | |alias |arc_challenge| | |
174
 
175
  Average: 61.6%
 
176
 
177
  ### HellaSwag
 
178
  | Task |Version| Metric | Value | |Stderr|
179
  |---------|------:|--------------------|---------|---|------|
180
  |hellaswag| 1|acc,none | 0.61| | |
@@ -184,8 +193,10 @@ Average: 61.6%
184
  | | |alias |hellaswag| | |
185
 
186
  Average: 79.89%
 
187
 
188
  ### MMLU
 
189
  | Task |Version| Metric | Value | |Stderr|
190
  |----------------------------------------|-------|---------------|---------------------------------------|---|------|
191
  |mmlu |N/A |acc,none | 0.7| | |
@@ -376,8 +387,10 @@ Average: 79.89%
376
  | | |acc_stderr,none|0.03 | | |
377
 
378
  Average: 69.95%
 
379
 
380
  ### TruthfulQA
 
381
  | Task |Version| Metric | Value | |Stderr|
382
  |--------------|-------|-----------------------|-----------------|---|------|
383
  |truthfulqa |N/A |bleu_acc,none | 0.45| | |
@@ -440,8 +453,10 @@ Average: 69.95%
440
  | | |alias | - truthfulqa_mc2| | |
441
 
442
  Average: 48.59%
 
443
 
444
  ### Winogrande
 
445
  | Task |Version| Metric | Value | |Stderr|
446
  |----------|------:|---------------|----------|---|------|
447
  |winogrande| 1|acc,none | 0.77| | |
@@ -449,8 +464,10 @@ Average: 48.59%
449
  | | |alias |winogrande| | |
450
 
451
  Average: 77.35%
 
452
 
453
  ### GSM8K
 
454
  |Task |Version| Metric |Value| |Stderr|
455
  |-----|------:|-----------------------------------|-----|---|------|
456
  |gsm8k| 3|exact_match,strict-match | 0.67| | |
 
79
  |---------------------------------------------------|------:|------:|---------:|-------:|------:|
80
  |[Master-Yi-9B](https://huggingface.co/qnguyen3/Master-Yi-9B)| 43.55| 71.48| 48.54| 41.43| 51.25|
81
 
82
+
83
  ### AGIEval
84
+ ```
85
  | Task |Version| Metric |Value| |Stderr|
86
  |------------------------------|------:|--------|----:|---|-----:|
87
  |agieval_aqua_rat | 0|acc |35.83|± | 3.01|
 
102
  | | |acc_norm|47.73|± | 3.38|
103
 
104
  Average: 43.55%
105
+ ```
106
 
107
  ### GPT4All
108
+ ```
109
  | Task |Version| Metric |Value| |Stderr|
110
  |-------------|------:|--------|----:|---|-----:|
111
  |arc_challenge| 0|acc |54.95|± | 1.45|
 
122
  |winogrande | 0|acc |72.61|± | 1.25|
123
 
124
  Average: 71.48%
125
+ ```
126
 
127
  ### TruthfulQA
128
+ ```
129
  | Task |Version|Metric|Value| |Stderr|
130
  |-------------|------:|------|----:|---|-----:|
131
  |truthfulqa_mc| 1|mc1 |33.05|± | 1.65|
132
  | | |mc2 |48.54|± | 1.54|
133
 
134
  Average: 48.54%
135
+ ```
136
 
137
  ### Bigbench
138
+ ```
139
  | Task |Version| Metric |Value| |Stderr|
140
  |------------------------------------------------|------:|---------------------|----:|---|-----:|
141
  |bigbench_causal_judgement | 0|multiple_choice_grade|54.74|± | 3.62|
 
169
  |---------------------------------------------------|---:|--------:|----:|---------:|---------:|----:|------:|
170
  |[Master-Yi-9B](https://huggingface.co/qnguyen3/Master-Yi-9B)|61.6| 79.89|69.95| 48.59| 77.35|67.48| 67.48|
171
 
 
172
  ### ARC
173
+ ```
174
  | Task |Version| Metric | Value | |Stderr|
175
  |-------------|------:|--------------------|-------------|---|------|
176
  |arc_challenge| 1|acc,none | 0.59| | |
 
180
  | | |alias |arc_challenge| | |
181
 
182
  Average: 61.6%
183
+ ```
184
 
185
  ### HellaSwag
186
+ ```
187
  | Task |Version| Metric | Value | |Stderr|
188
  |---------|------:|--------------------|---------|---|------|
189
  |hellaswag| 1|acc,none | 0.61| | |
 
193
  | | |alias |hellaswag| | |
194
 
195
  Average: 79.89%
196
+ ```
197
 
198
  ### MMLU
199
+ ```
200
  | Task |Version| Metric | Value | |Stderr|
201
  |----------------------------------------|-------|---------------|---------------------------------------|---|------|
202
  |mmlu |N/A |acc,none | 0.7| | |
 
387
  | | |acc_stderr,none|0.03 | | |
388
 
389
  Average: 69.95%
390
+ ```
391
 
392
  ### TruthfulQA
393
+ ```
394
  | Task |Version| Metric | Value | |Stderr|
395
  |--------------|-------|-----------------------|-----------------|---|------|
396
  |truthfulqa |N/A |bleu_acc,none | 0.45| | |
 
453
  | | |alias | - truthfulqa_mc2| | |
454
 
455
  Average: 48.59%
456
+ ```
457
 
458
  ### Winogrande
459
+ ```
460
  | Task |Version| Metric | Value | |Stderr|
461
  |----------|------:|---------------|----------|---|------|
462
  |winogrande| 1|acc,none | 0.77| | |
 
464
  | | |alias |winogrande| | |
465
 
466
  Average: 77.35%
467
+ ```
468
 
469
  ### GSM8K
470
+ ```
471
  |Task |Version| Metric |Value| |Stderr|
472
  |-----|------:|-----------------------------------|-----|---|------|
473
  |gsm8k| 3|exact_match,strict-match | 0.67| | |