Files changed (1) hide show
  1. README.md +106 -0
README.md CHANGED
@@ -106,6 +106,98 @@ model-index:
106
  source:
107
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=davidkim205/Rhea-72b-v0.5
108
  name: Open LLM Leaderboard
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  ---
110
  # Rhea-72b-v0.5
111
 
@@ -230,3 +322,17 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
230
  |Winogrande (5-shot) |87.85|
231
  |GSM8k (5-shot) |76.12|
232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  source:
107
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=davidkim205/Rhea-72b-v0.5
108
  name: Open LLM Leaderboard
109
+ - task:
110
+ type: text-generation
111
+ name: Text Generation
112
+ dataset:
113
+ name: IFEval (0-Shot)
114
+ type: HuggingFaceH4/ifeval
115
+ args:
116
+ num_few_shot: 0
117
+ metrics:
118
+ - type: inst_level_strict_acc and prompt_level_strict_acc
119
+ value: 1.45
120
+ name: strict accuracy
121
+ source:
122
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=davidkim205/Rhea-72b-v0.5
123
+ name: Open LLM Leaderboard
124
+ - task:
125
+ type: text-generation
126
+ name: Text Generation
127
+ dataset:
128
+ name: BBH (3-Shot)
129
+ type: BBH
130
+ args:
131
+ num_few_shot: 3
132
+ metrics:
133
+ - type: acc_norm
134
+ value: 3.67
135
+ name: normalized accuracy
136
+ source:
137
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=davidkim205/Rhea-72b-v0.5
138
+ name: Open LLM Leaderboard
139
+ - task:
140
+ type: text-generation
141
+ name: Text Generation
142
+ dataset:
143
+ name: MATH Lvl 5 (4-Shot)
144
+ type: hendrycks/competition_math
145
+ args:
146
+ num_few_shot: 4
147
+ metrics:
148
+ - type: exact_match
149
+ value: 5.51
150
+ name: exact match
151
+ source:
152
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=davidkim205/Rhea-72b-v0.5
153
+ name: Open LLM Leaderboard
154
+ - task:
155
+ type: text-generation
156
+ name: Text Generation
157
+ dataset:
158
+ name: GPQA (0-shot)
159
+ type: Idavidrein/gpqa
160
+ args:
161
+ num_few_shot: 0
162
+ metrics:
163
+ - type: acc_norm
164
+ value: 0.34
165
+ name: acc_norm
166
+ source:
167
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=davidkim205/Rhea-72b-v0.5
168
+ name: Open LLM Leaderboard
169
+ - task:
170
+ type: text-generation
171
+ name: Text Generation
172
+ dataset:
173
+ name: MuSR (0-shot)
174
+ type: TAUR-Lab/MuSR
175
+ args:
176
+ num_few_shot: 0
177
+ metrics:
178
+ - type: acc_norm
179
+ value: 11.32
180
+ name: acc_norm
181
+ source:
182
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=davidkim205/Rhea-72b-v0.5
183
+ name: Open LLM Leaderboard
184
+ - task:
185
+ type: text-generation
186
+ name: Text Generation
187
+ dataset:
188
+ name: MMLU-PRO (5-shot)
189
+ type: TIGER-Lab/MMLU-Pro
190
+ config: main
191
+ split: test
192
+ args:
193
+ num_few_shot: 5
194
+ metrics:
195
+ - type: acc
196
+ value: 1.85
197
+ name: accuracy
198
+ source:
199
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=davidkim205/Rhea-72b-v0.5
200
+ name: Open LLM Leaderboard
201
  ---
202
  # Rhea-72b-v0.5
203
 
 
322
  |Winogrande (5-shot) |87.85|
323
  |GSM8k (5-shot) |76.12|
324
 
325
+
326
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
327
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_davidkim205__Rhea-72b-v0.5)
328
+
329
+ | Metric |Value|
330
+ |-------------------|----:|
331
+ |Avg. | 4.02|
332
+ |IFEval (0-Shot) | 1.45|
333
+ |BBH (3-Shot) | 3.67|
334
+ |MATH Lvl 5 (4-Shot)| 5.51|
335
+ |GPQA (0-shot) | 0.34|
336
+ |MuSR (0-shot) |11.32|
337
+ |MMLU-PRO (5-shot) | 1.85|
338
+