Update README.md
Browse files
README.md
CHANGED
@@ -159,23 +159,70 @@ evalplus.evaluate \
|
|
159 |
|
160 |
### Accuracy
|
161 |
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
|
181 |
|
|
|
159 |
|
160 |
### Accuracy
|
161 |
|
162 |
+
<table>
|
163 |
+
<thead>
|
164 |
+
<tr>
|
165 |
+
<th>Category</th>
|
166 |
+
<th>Metric</th>
|
167 |
+
<th>ibm-granite/granite-3.1-8b-base</th>
|
168 |
+
<th>neuralmagic/granite-3.1-8b-base-FP8-dynamic</th>
|
169 |
+
<th>Recovery (%)</th>
|
170 |
+
</tr>
|
171 |
+
</thead>
|
172 |
+
<tbody>
|
173 |
+
<tr>
|
174 |
+
<td rowspan="7"><b>OpenLLM Leaderboard V1</b></td>
|
175 |
+
<td>ARC-Challenge (Acc-Norm, 25-shot)</td>
|
176 |
+
<td>64.68</td>
|
177 |
+
<td>64.16</td>
|
178 |
+
<td>99.20</td>
|
179 |
+
</tr>
|
180 |
+
<tr>
|
181 |
+
<td>GSM8K (Strict-Match, 5-shot)</td>
|
182 |
+
<td>60.88</td>
|
183 |
+
<td>58.45</td>
|
184 |
+
<td>95.99</td>
|
185 |
+
</tr>
|
186 |
+
<tr>
|
187 |
+
<td>HellaSwag (Acc-Norm, 10-shot)</td>
|
188 |
+
<td>83.52</td>
|
189 |
+
<td>83.46</td>
|
190 |
+
<td>99.93</td>
|
191 |
+
</tr>
|
192 |
+
<tr>
|
193 |
+
<td>MMLU (Acc, 5-shot)</td>
|
194 |
+
<td>63.33</td>
|
195 |
+
<td>63.35</td>
|
196 |
+
<td>100.03</td>
|
197 |
+
</tr>
|
198 |
+
<tr>
|
199 |
+
<td>TruthfulQA (MC2, 0-shot)</td>
|
200 |
+
<td>51.33</td>
|
201 |
+
<td>51.56</td>
|
202 |
+
<td>100.45</td>
|
203 |
+
</tr>
|
204 |
+
<tr>
|
205 |
+
<td>Winogrande (Acc, 5-shot)</td>
|
206 |
+
<td>80.90</td>
|
207 |
+
<td>80.66</td>
|
208 |
+
<td>99.70</td>
|
209 |
+
</tr>
|
210 |
+
<tr>
|
211 |
+
<td><b>Average Score</b></td>
|
212 |
+
<td><b>67.44</b></td>
|
213 |
+
<td><b>66.94</b></td>
|
214 |
+
<td><b>99.26</b></td>
|
215 |
+
</tr>
|
216 |
+
<tr>
|
217 |
+
<td rowspan="2"><b>HumanEval</b></td>
|
218 |
+
<td>HumanEval Pass@1</td>
|
219 |
+
<td>44.10</td>
|
220 |
+
<td>44.80</td>
|
221 |
+
<td><b>101.59</b></td>
|
222 |
+
</tr>
|
223 |
+
</tbody>
|
224 |
+
</table>
|
225 |
+
|
226 |
|
227 |
|
228 |
|