alexmarques commited on
Commit
c27accb
1 Parent(s): 71fb5c0

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -3
README.md CHANGED
@@ -169,7 +169,7 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
169
  </td>
170
  </tr>
171
  <tr>
172
- <td>GSM-8K-cot (8-shot, strict-match)
173
  </td>
174
  <td>96.44
175
  </td>
@@ -199,7 +199,7 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
199
  </td>
200
  </tr>
201
  <tr>
202
- <td>TruthfulQA (0-shot)
203
  </td>
204
  <td>64.64
205
  </td>
@@ -253,6 +253,7 @@ lm_eval \
253
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=8 \
254
  --tasks gsm8k_cot_llama_3.1_instruct \
255
  --apply_chat_template \
 
256
  --num_fewshot 8 \
257
  --batch_size auto
258
  ```
@@ -282,7 +283,7 @@ lm_eval \
282
  lm_eval \
283
  --model vllm \
284
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=8 \
285
- --tasks truthfulqa_mc \
286
  --num_fewshot 0 \
287
  --batch_size auto
288
  ```
 
169
  </td>
170
  </tr>
171
  <tr>
172
+ <td>GSM-8K (CoT, 8-shot, strict-match)
173
  </td>
174
  <td>96.44
175
  </td>
 
199
  </td>
200
  </tr>
201
  <tr>
202
+ <td>TruthfulQA (0-shot, mc2)
203
  </td>
204
  <td>64.64
205
  </td>
 
253
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=8 \
254
  --tasks gsm8k_cot_llama_3.1_instruct \
255
  --apply_chat_template \
256
+ --fewshot_as_multiturn \
257
  --num_fewshot 8 \
258
  --batch_size auto
259
  ```
 
283
  lm_eval \
284
  --model vllm \
285
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=8 \
286
+ --tasks truthfulqa \
287
  --num_fewshot 0 \
288
  --batch_size auto
289
  ```