sequelbox commited on
Commit
2bcf208
1 Parent(s): 3602f94
Files changed (1) hide show
  1. README.md +91 -23
README.md CHANGED
@@ -32,7 +32,6 @@ datasets:
32
  - sequelbox/Spurline
33
  - sequelbox/Supernova
34
  model_type: llama
35
- license: llama3.1
36
  model-index:
37
  - name: Llama3.1-8B-ShiningValiant2
38
  results:
@@ -60,30 +59,111 @@ model-index:
60
  - type: acc
61
  value: 68.75
62
  name: acc
 
 
 
 
 
 
 
 
 
63
  - type: acc
64
  value: 73.23
65
  name: acc
 
 
 
 
 
 
 
 
 
66
  - type: acc
67
- value: 46.0
68
  name: acc
 
 
 
 
 
 
 
 
 
69
  - type: acc
70
  value: 44.33
71
  name: acc
 
 
 
 
 
 
 
 
 
72
  - type: acc
73
  value: 53.19
74
  name: acc
 
 
 
 
 
 
 
 
 
75
  - type: acc
76
  value: 37.25
77
  name: acc
 
 
 
 
 
 
 
 
 
78
  - type: acc
79
  value: 42.38
80
  name: acc
 
 
 
 
 
 
 
 
 
81
  - type: acc
82
- value: 56.0
83
  name: acc
 
 
 
 
 
 
 
 
 
84
  - type: acc
85
- value: 63.0
86
  name: acc
 
 
 
 
 
 
 
 
 
87
  - type: acc
88
  value: 63.16
89
  name: acc
@@ -97,7 +177,7 @@ model-index:
97
  num_few_shot: 0
98
  metrics:
99
  - type: inst_level_strict_acc and prompt_level_strict_acc
100
- value: 65.24
101
  name: strict accuracy
102
  source:
103
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
@@ -127,7 +207,7 @@ model-index:
127
  num_few_shot: 4
128
  metrics:
129
  - type: exact_match
130
- value: 11.63
131
  name: exact match
132
  source:
133
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
@@ -142,7 +222,7 @@ model-index:
142
  num_few_shot: 0
143
  metrics:
144
  - type: acc_norm
145
- value: 8.95
146
  name: acc_norm
147
  source:
148
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
@@ -157,7 +237,7 @@ model-index:
157
  num_few_shot: 0
158
  metrics:
159
  - type: acc_norm
160
- value: 7.19
161
  name: acc_norm
162
  source:
163
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
@@ -174,11 +254,12 @@ model-index:
174
  num_few_shot: 5
175
  metrics:
176
  - type: acc
177
- value: 26.38
178
  name: accuracy
179
  source:
180
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
181
  name: Open LLM Leaderboard
 
182
  ---
183
 
184
 
@@ -252,17 +333,4 @@ Shining Valiant 2 is created by [Valiant Labs.](http://valiantlabs.ca/)
252
  We care about open source.
253
  For everyone to use.
254
 
255
- We encourage others to finetune further from our models.
256
- # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
257
- Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/ValiantLabs__Llama3.1-8B-ShiningValiant2-details)
258
-
259
- | Metric |Value|
260
- |-------------------|----:|
261
- |Avg. |24.37|
262
- |IFEval (0-Shot) |64.96|
263
- |BBH (3-Shot) |26.35|
264
- |MATH Lvl 5 (4-Shot)|12.92|
265
- |GPQA (0-shot) | 8.05|
266
- |MuSR (0-shot) | 7.46|
267
- |MMLU-PRO (5-shot) |26.46|
268
-
 
32
  - sequelbox/Spurline
33
  - sequelbox/Supernova
34
  model_type: llama
 
35
  model-index:
36
  - name: Llama3.1-8B-ShiningValiant2
37
  results:
 
59
  - type: acc
60
  value: 68.75
61
  name: acc
62
+ - task:
63
+ type: text-generation
64
+ name: Text Generation
65
+ dataset:
66
+ name: MMLU High School Biology (5-Shot)
67
+ type: MMLU
68
+ args:
69
+ num_few_shot: 5
70
+ metrics:
71
  - type: acc
72
  value: 73.23
73
  name: acc
74
+ - task:
75
+ type: text-generation
76
+ name: Text Generation
77
+ dataset:
78
+ name: MMLU College Chemistry (5-Shot)
79
+ type: MMLU
80
+ args:
81
+ num_few_shot: 5
82
+ metrics:
83
  - type: acc
84
+ value: 46.00
85
  name: acc
86
+ - task:
87
+ type: text-generation
88
+ name: Text Generation
89
+ dataset:
90
+ name: MMLU High School Chemistry (5-Shot)
91
+ type: MMLU
92
+ args:
93
+ num_few_shot: 5
94
+ metrics:
95
  - type: acc
96
  value: 44.33
97
  name: acc
98
+ - task:
99
+ type: text-generation
100
+ name: Text Generation
101
+ dataset:
102
+ name: MMLU Conceptual Physics (5-Shot)
103
+ type: MMLU
104
+ args:
105
+ num_few_shot: 5
106
+ metrics:
107
  - type: acc
108
  value: 53.19
109
  name: acc
110
+ - task:
111
+ type: text-generation
112
+ name: Text Generation
113
+ dataset:
114
+ name: MMLU College Physics (5-Shot)
115
+ type: MMLU
116
+ args:
117
+ num_few_shot: 5
118
+ metrics:
119
  - type: acc
120
  value: 37.25
121
  name: acc
122
+ - task:
123
+ type: text-generation
124
+ name: Text Generation
125
+ dataset:
126
+ name: MMLU High School Physics (5-Shot)
127
+ type: MMLU
128
+ args:
129
+ num_few_shot: 5
130
+ metrics:
131
  - type: acc
132
  value: 42.38
133
  name: acc
134
+ - task:
135
+ type: text-generation
136
+ name: Text Generation
137
+ dataset:
138
+ name: MMLU College Computer Science (5-Shot)
139
+ type: MMLU
140
+ args:
141
+ num_few_shot: 5
142
+ metrics:
143
  - type: acc
144
+ value: 56.00
145
  name: acc
146
+ - task:
147
+ type: text-generation
148
+ name: Text Generation
149
+ dataset:
150
+ name: MMLU High School Computer Science (5-Shot)
151
+ type: MMLU
152
+ args:
153
+ num_few_shot: 5
154
+ metrics:
155
  - type: acc
156
+ value: 63.00
157
  name: acc
158
+ - task:
159
+ type: text-generation
160
+ name: Text Generation
161
+ dataset:
162
+ name: MMLU Astronomy (5-shot)
163
+ type: MMLU
164
+ args:
165
+ num_few_shot: 5
166
+ metrics:
167
  - type: acc
168
  value: 63.16
169
  name: acc
 
177
  num_few_shot: 0
178
  metrics:
179
  - type: inst_level_strict_acc and prompt_level_strict_acc
180
+ value: 64.96
181
  name: strict accuracy
182
  source:
183
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
 
207
  num_few_shot: 4
208
  metrics:
209
  - type: exact_match
210
+ value: 12.92
211
  name: exact match
212
  source:
213
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
 
222
  num_few_shot: 0
223
  metrics:
224
  - type: acc_norm
225
+ value: 8.05
226
  name: acc_norm
227
  source:
228
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
 
237
  num_few_shot: 0
238
  metrics:
239
  - type: acc_norm
240
+ value: 7.46
241
  name: acc_norm
242
  source:
243
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
 
254
  num_few_shot: 5
255
  metrics:
256
  - type: acc
257
+ value: 26.46
258
  name: accuracy
259
  source:
260
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
261
  name: Open LLM Leaderboard
262
+ license: llama3.1
263
  ---
264
 
265
 
 
333
  We care about open source.
334
  For everyone to use.
335
 
336
+ We encourage others to finetune further from our models.