leaderboard / pegasus /benchmark.yaml
Zhiyu Wu
add attention mask; fix stop_str length (#26)
01bc423 unverified
raw
history blame
1.65 kB
# This YAML dictionary will expand into 20 (models) x 4 (tasks) = 80 job commands,
# where {{ model }} and {{ task }} are filled in with all possible combinations.
# {{ gpu }} is defined in `hosts.yaml`, and will be filled in when Pegasus
# determines the specific node and gpu the generated job command will run on.
- command:
- docker exec leaderboard{{ gpu }} python scripts/benchmark.py --input-file sharegpt/sg_90k_part1_html_cleaned_lang_first_sampled_sorted.json --model-path {{ model }} --task {{ task }} --batch-size {{ batch_size }}
model:
- /data/leaderboard/weights/metaai/llama-7B
- /data/leaderboard/weights/metaai/llama-13B
- /data/leaderboard/weights/lmsys/vicuna-7B
- /data/leaderboard/weights/lmsys/vicuna-13B
- /data/leaderboard/weights/tatsu-lab/alpaca-7B
- /data/leaderboard/weights/BAIR/koala-7b
- /data/leaderboard/weights/BAIR/koala-13b
- /data/leaderboard/weights/BlinkDL/RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth
- camel-ai/CAMEL-13B-Combined-Data
- databricks/dolly-v2-12b
- FreedomIntelligence/phoenix-inst-chat-7b
- h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2
- lmsys/fastchat-t5-3b-v1.0
- Neutralzz/BiLLa-7B-SFT
- nomic-ai/gpt4all-13b-snoozy
- openaccess-ai-collective/manticore-13b-chat-pyg
- OpenAssistant/oasst-sft-1-pythia-12b
- project-baize/baize-v2-7B
- StabilityAI/stablelm-tuned-alpha-7b
- togethercomputer/RedPajama-INCITE-7B-Chat
- Salesforce/xgen-7b-8k-inst
task:
- chat
- chat-concise
- instruct
- instruct-concise
batch_size:
- 1
- 2
- 4
- 8
- 16