Jae-Won Chung commited on
Commit
b559f9a
1 Parent(s): 2ca1e12

Replace slashes with double dashes

Browse files
Files changed (1) hide show
  1. pegasus/nlp-eval.yaml +8 -6
pegasus/nlp-eval.yaml CHANGED
@@ -1,5 +1,5 @@
1
  - command:
2
- - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{model}},trust_remote_code=True,use_accelerate=True --tasks arc_challenge --num_fewshot 25
3
  model:
4
  - /data/leaderboard/weights/metaai/llama-7B
5
  - /data/leaderboard/weights/metaai/llama-13B
@@ -21,7 +21,7 @@
21
  - togethercomputer/RedPajama-INCITE-7B-Chat
22
 
23
  - command:
24
- - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{model}},trust_remote_code=True,use_accelerate=True --tasks hellaswag --num_fewshot 10
25
  model:
26
  - /data/leaderboard/weights/metaai/llama-7B
27
  - /data/leaderboard/weights/metaai/llama-13B
@@ -43,7 +43,7 @@
43
  - togethercomputer/RedPajama-INCITE-7B-Chat
44
 
45
  - command:
46
- - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{model}},trust_remote_code=True,use_accelerate=True --tasks truthfulqa_mc --num_fewshot 0
47
  model:
48
  - /data/leaderboard/weights/metaai/llama-7B
49
  - /data/leaderboard/weights/metaai/llama-13B
@@ -65,6 +65,8 @@
65
  - togethercomputer/RedPajama-INCITE-7B-Chat
66
 
67
  - command:
68
- - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained=lmsys/fastchat-t5-3b-v1.0,trust_remote_code=True,use_accelerate=True --tasks arc_challenge --num_fewshot 25
69
- - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained=lmsys/fastchat-t5-3b-v1.0,trust_remote_code=True,use_accelerate=True --tasks hellaswag --num_fewshot 10
70
- - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained=lmsys/fastchat-t5-3b-v1.0,trust_remote_code=True,use_accelerate=True --tasks truthfulqa_mc --num_fewshot 0
 
 
 
1
  - command:
2
+ - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks arc_challenge --num_fewshot 25 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}.json
3
  model:
4
  - /data/leaderboard/weights/metaai/llama-7B
5
  - /data/leaderboard/weights/metaai/llama-13B
 
21
  - togethercomputer/RedPajama-INCITE-7B-Chat
22
 
23
  - command:
24
+ - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks hellaswag --num_fewshot 10 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}.json
25
  model:
26
  - /data/leaderboard/weights/metaai/llama-7B
27
  - /data/leaderboard/weights/metaai/llama-13B
 
43
  - togethercomputer/RedPajama-INCITE-7B-Chat
44
 
45
  - command:
46
+ - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks truthfulqa_mc --num_fewshot 0 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}.json
47
  model:
48
  - /data/leaderboard/weights/metaai/llama-7B
49
  - /data/leaderboard/weights/metaai/llama-13B
 
65
  - togethercomputer/RedPajama-INCITE-7B-Chat
66
 
67
  - command:
68
+ - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks arc_challenge --num_fewshot 25 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}.json
69
+ - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks hellaswag --num_fewshot 10 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}.json
70
+ - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks truthfulqa_mc --num_fewshot 0 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}.json
71
+ model:
72
+ - lmsys/fastchat-t5-3b-v1.0