Jae-Won Chung commited on
Commit
b08a0ac
1 Parent(s): 46f6b9d

Add RWKV to nlp-eval.yaml

Browse files
Files changed (1) hide show
  1. pegasus/nlp-eval.yaml +3 -0
pegasus/nlp-eval.yaml CHANGED
@@ -19,6 +19,7 @@
19
  - project-baize/baize-v2-7B
20
  - StabilityAI/stablelm-tuned-alpha-7b
21
  - togethercomputer/RedPajama-INCITE-7B-Chat
 
22
 
23
  - command:
24
  - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks hellaswag --num_fewshot 10 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/hellaswag.json
@@ -41,6 +42,7 @@
41
  - project-baize/baize-v2-7B
42
  - StabilityAI/stablelm-tuned-alpha-7b
43
  - togethercomputer/RedPajama-INCITE-7B-Chat
 
44
 
45
  - command:
46
  - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks truthfulqa_mc --num_fewshot 0 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/truthfulqa_mc.json
@@ -63,6 +65,7 @@
63
  - project-baize/baize-v2-7B
64
  - StabilityAI/stablelm-tuned-alpha-7b
65
  - togethercomputer/RedPajama-INCITE-7B-Chat
 
66
 
67
  - command:
68
  - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks arc_challenge --num_fewshot 25 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/arc_challenge.json
 
19
  - project-baize/baize-v2-7B
20
  - StabilityAI/stablelm-tuned-alpha-7b
21
  - togethercomputer/RedPajama-INCITE-7B-Chat
22
+ - RWKV/rwkv-raven-7b
23
 
24
  - command:
25
  - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks hellaswag --num_fewshot 10 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/hellaswag.json
 
42
  - project-baize/baize-v2-7B
43
  - StabilityAI/stablelm-tuned-alpha-7b
44
  - togethercomputer/RedPajama-INCITE-7B-Chat
45
+ - RWKV/rwkv-raven-7b
46
 
47
  - command:
48
  - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks truthfulqa_mc --num_fewshot 0 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/truthfulqa_mc.json
 
65
  - project-baize/baize-v2-7B
66
  - StabilityAI/stablelm-tuned-alpha-7b
67
  - togethercomputer/RedPajama-INCITE-7B-Chat
68
+ - RWKV/rwkv-raven-7b
69
 
70
  - command:
71
  - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks arc_challenge --num_fewshot 25 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/arc_challenge.json