vicgalle leaderboard-pr-bot commited on
Commit
803651e
1 Parent(s): 2d449e6

Adding Evaluation Results (#3)

Browse files

- Adding Evaluation Results (5ae7316b931c7e40d1cc2eb43872c234dd69ccde)


Co-authored-by: Open LLM Leaderboard PR Bot <leaderboard-pr-bot@users.noreply.huggingface.co>

Files changed (1) hide show
  1. README.md +114 -15
README.md CHANGED
@@ -1,6 +1,8 @@
1
  ---
2
  license: apache-2.0
3
  library_name: transformers
 
 
4
  model-index:
5
  - name: Configurable-Yi-1.5-9B-Chat
6
  results:
@@ -19,8 +21,7 @@ model-index:
19
  value: 64.16
20
  name: normalized accuracy
21
  source:
22
- url: >-
23
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
24
  name: Open LLM Leaderboard
25
  - task:
26
  type: text-generation
@@ -36,8 +37,7 @@ model-index:
36
  value: 81.7
37
  name: normalized accuracy
38
  source:
39
- url: >-
40
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
41
  name: Open LLM Leaderboard
42
  - task:
43
  type: text-generation
@@ -54,8 +54,7 @@ model-index:
54
  value: 70.99
55
  name: accuracy
56
  source:
57
- url: >-
58
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
59
  name: Open LLM Leaderboard
60
  - task:
61
  type: text-generation
@@ -71,8 +70,7 @@ model-index:
71
  - type: mc2
72
  value: 58.75
73
  source:
74
- url: >-
75
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
76
  name: Open LLM Leaderboard
77
  - task:
78
  type: text-generation
@@ -89,8 +87,7 @@ model-index:
89
  value: 76.8
90
  name: accuracy
91
  source:
92
- url: >-
93
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
94
  name: Open LLM Leaderboard
95
  - task:
96
  type: text-generation
@@ -107,11 +104,100 @@ model-index:
107
  value: 70.58
108
  name: accuracy
109
  source:
110
- url: >-
111
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  name: Open LLM Leaderboard
113
- datasets:
114
- - vicgalle/configurable-system-prompt-multitask
115
  ---
116
 
117
 
@@ -217,4 +303,17 @@ If you find this work, data and/or models useful for your research, please consi
217
  archivePrefix={arXiv},
218
  primaryClass={cs.CL}
219
  }
220
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  library_name: transformers
4
+ datasets:
5
+ - vicgalle/configurable-system-prompt-multitask
6
  model-index:
7
  - name: Configurable-Yi-1.5-9B-Chat
8
  results:
 
21
  value: 64.16
22
  name: normalized accuracy
23
  source:
24
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
 
25
  name: Open LLM Leaderboard
26
  - task:
27
  type: text-generation
 
37
  value: 81.7
38
  name: normalized accuracy
39
  source:
40
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
 
41
  name: Open LLM Leaderboard
42
  - task:
43
  type: text-generation
 
54
  value: 70.99
55
  name: accuracy
56
  source:
57
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
 
58
  name: Open LLM Leaderboard
59
  - task:
60
  type: text-generation
 
70
  - type: mc2
71
  value: 58.75
72
  source:
73
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
 
74
  name: Open LLM Leaderboard
75
  - task:
76
  type: text-generation
 
87
  value: 76.8
88
  name: accuracy
89
  source:
90
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
 
91
  name: Open LLM Leaderboard
92
  - task:
93
  type: text-generation
 
104
  value: 70.58
105
  name: accuracy
106
  source:
107
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
108
+ name: Open LLM Leaderboard
109
+ - task:
110
+ type: text-generation
111
+ name: Text Generation
112
+ dataset:
113
+ name: IFEval (0-Shot)
114
+ type: HuggingFaceH4/ifeval
115
+ args:
116
+ num_few_shot: 0
117
+ metrics:
118
+ - type: inst_level_strict_acc and prompt_level_strict_acc
119
+ value: 43.23
120
+ name: strict accuracy
121
+ source:
122
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
123
+ name: Open LLM Leaderboard
124
+ - task:
125
+ type: text-generation
126
+ name: Text Generation
127
+ dataset:
128
+ name: BBH (3-Shot)
129
+ type: BBH
130
+ args:
131
+ num_few_shot: 3
132
+ metrics:
133
+ - type: acc_norm
134
+ value: 35.33
135
+ name: normalized accuracy
136
+ source:
137
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
138
+ name: Open LLM Leaderboard
139
+ - task:
140
+ type: text-generation
141
+ name: Text Generation
142
+ dataset:
143
+ name: MATH Lvl 5 (4-Shot)
144
+ type: hendrycks/competition_math
145
+ args:
146
+ num_few_shot: 4
147
+ metrics:
148
+ - type: exact_match
149
+ value: 6.12
150
+ name: exact match
151
+ source:
152
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
153
+ name: Open LLM Leaderboard
154
+ - task:
155
+ type: text-generation
156
+ name: Text Generation
157
+ dataset:
158
+ name: GPQA (0-shot)
159
+ type: Idavidrein/gpqa
160
+ args:
161
+ num_few_shot: 0
162
+ metrics:
163
+ - type: acc_norm
164
+ value: 12.42
165
+ name: acc_norm
166
+ source:
167
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
168
+ name: Open LLM Leaderboard
169
+ - task:
170
+ type: text-generation
171
+ name: Text Generation
172
+ dataset:
173
+ name: MuSR (0-shot)
174
+ type: TAUR-Lab/MuSR
175
+ args:
176
+ num_few_shot: 0
177
+ metrics:
178
+ - type: acc_norm
179
+ value: 12.02
180
+ name: acc_norm
181
+ source:
182
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
183
+ name: Open LLM Leaderboard
184
+ - task:
185
+ type: text-generation
186
+ name: Text Generation
187
+ dataset:
188
+ name: MMLU-PRO (5-shot)
189
+ type: TIGER-Lab/MMLU-Pro
190
+ config: main
191
+ split: test
192
+ args:
193
+ num_few_shot: 5
194
+ metrics:
195
+ - type: acc
196
+ value: 33.5
197
+ name: accuracy
198
+ source:
199
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=vicgalle/Configurable-Yi-1.5-9B-Chat
200
  name: Open LLM Leaderboard
 
 
201
  ---
202
 
203
 
 
303
  archivePrefix={arXiv},
304
  primaryClass={cs.CL}
305
  }
306
+ ```
307
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
308
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_vicgalle__Configurable-Yi-1.5-9B-Chat)
309
+
310
+ | Metric |Value|
311
+ |-------------------|----:|
312
+ |Avg. |23.77|
313
+ |IFEval (0-Shot) |43.23|
314
+ |BBH (3-Shot) |35.33|
315
+ |MATH Lvl 5 (4-Shot)| 6.12|
316
+ |GPQA (0-shot) |12.42|
317
+ |MuSR (0-shot) |12.02|
318
+ |MMLU-PRO (5-shot) |33.50|
319
+