alexmarques
commited on
Commit
•
c27accb
1
Parent(s):
71fb5c0
Update README.md
Browse files
README.md
CHANGED
@@ -169,7 +169,7 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
|
|
169 |
</td>
|
170 |
</tr>
|
171 |
<tr>
|
172 |
-
<td>GSM-8K
|
173 |
</td>
|
174 |
<td>96.44
|
175 |
</td>
|
@@ -199,7 +199,7 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
|
|
199 |
</td>
|
200 |
</tr>
|
201 |
<tr>
|
202 |
-
<td>TruthfulQA (0-shot)
|
203 |
</td>
|
204 |
<td>64.64
|
205 |
</td>
|
@@ -253,6 +253,7 @@ lm_eval \
|
|
253 |
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=8 \
|
254 |
--tasks gsm8k_cot_llama_3.1_instruct \
|
255 |
--apply_chat_template \
|
|
|
256 |
--num_fewshot 8 \
|
257 |
--batch_size auto
|
258 |
```
|
@@ -282,7 +283,7 @@ lm_eval \
|
|
282 |
lm_eval \
|
283 |
--model vllm \
|
284 |
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=8 \
|
285 |
-
--tasks
|
286 |
--num_fewshot 0 \
|
287 |
--batch_size auto
|
288 |
```
|
|
|
169 |
</td>
|
170 |
</tr>
|
171 |
<tr>
|
172 |
+
<td>GSM-8K (CoT, 8-shot, strict-match)
|
173 |
</td>
|
174 |
<td>96.44
|
175 |
</td>
|
|
|
199 |
</td>
|
200 |
</tr>
|
201 |
<tr>
|
202 |
+
<td>TruthfulQA (0-shot, mc2)
|
203 |
</td>
|
204 |
<td>64.64
|
205 |
</td>
|
|
|
253 |
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=8 \
|
254 |
--tasks gsm8k_cot_llama_3.1_instruct \
|
255 |
--apply_chat_template \
|
256 |
+
--fewshot_as_multiturn \
|
257 |
--num_fewshot 8 \
|
258 |
--batch_size auto
|
259 |
```
|
|
|
283 |
lm_eval \
|
284 |
--model vllm \
|
285 |
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=8 \
|
286 |
+
--tasks truthfulqa \
|
287 |
--num_fewshot 0 \
|
288 |
--batch_size auto
|
289 |
```
|