Jae-Won Chung commited on
Commit
b19efcb
1 Parent(s): 8a69511

Add Salesforce/xgen-7b-8k-inst to model list

Browse files
Files changed (2) hide show
  1. pegasus/benchmark.yaml +1 -0
  2. pegasus/nlp-eval.yaml +3 -0
pegasus/benchmark.yaml CHANGED
@@ -25,6 +25,7 @@
25
  - project-baize/baize-v2-7B
26
  - StabilityAI/stablelm-tuned-alpha-7b
27
  - togethercomputer/RedPajama-INCITE-7B-Chat
 
28
  task:
29
  - chat
30
  - chat-concise
 
25
  - project-baize/baize-v2-7B
26
  - StabilityAI/stablelm-tuned-alpha-7b
27
  - togethercomputer/RedPajama-INCITE-7B-Chat
28
+ - Salesforce/xgen-7b-8k-inst
29
  task:
30
  - chat
31
  - chat-concise
pegasus/nlp-eval.yaml CHANGED
@@ -20,6 +20,7 @@
20
  - StabilityAI/stablelm-tuned-alpha-7b
21
  - togethercomputer/RedPajama-INCITE-7B-Chat
22
  - RWKV/rwkv-raven-7b
 
23
 
24
  - command:
25
  - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks hellaswag --num_fewshot 10 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/hellaswag.json
@@ -43,6 +44,7 @@
43
  - StabilityAI/stablelm-tuned-alpha-7b
44
  - togethercomputer/RedPajama-INCITE-7B-Chat
45
  - RWKV/rwkv-raven-7b
 
46
 
47
  - command:
48
  - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks truthfulqa_mc --num_fewshot 0 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/truthfulqa_mc.json
@@ -66,6 +68,7 @@
66
  - StabilityAI/stablelm-tuned-alpha-7b
67
  - togethercomputer/RedPajama-INCITE-7B-Chat
68
  - RWKV/rwkv-raven-7b
 
69
 
70
  - command:
71
  - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks arc_challenge --num_fewshot 25 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/arc_challenge.json
 
20
  - StabilityAI/stablelm-tuned-alpha-7b
21
  - togethercomputer/RedPajama-INCITE-7B-Chat
22
  - RWKV/rwkv-raven-7b
23
+ - Salesforce/xgen-7b-8k-inst
24
 
25
  - command:
26
  - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks hellaswag --num_fewshot 10 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/hellaswag.json
 
44
  - StabilityAI/stablelm-tuned-alpha-7b
45
  - togethercomputer/RedPajama-INCITE-7B-Chat
46
  - RWKV/rwkv-raven-7b
47
+ - Salesforce/xgen-7b-8k-inst
48
 
49
  - command:
50
  - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks truthfulqa_mc --num_fewshot 0 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/truthfulqa_mc.json
 
68
  - StabilityAI/stablelm-tuned-alpha-7b
69
  - togethercomputer/RedPajama-INCITE-7B-Chat
70
  - RWKV/rwkv-raven-7b
71
+ - Salesforce/xgen-7b-8k-inst
72
 
73
  - command:
74
  - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks arc_challenge --num_fewshot 25 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/arc_challenge.json