Zhiyu Wu Zhiyu Wu commited on
Commit
8595b18
1 Parent(s): fddc51a

Add NLP evaluation metrics (#8)

Browse files

Co-authored-by: Zhiyu Wu <zhiyuwu@ampere01.eecs.umich.edu>

Files changed (1) hide show
  1. data/2023-06-17/score.csv +21 -21
data/2023-06-17/score.csv CHANGED
@@ -1,21 +1,21 @@
1
- model,lmsys_elo
2
- lmsys/vicuna-7B,1007
3
- lmsys/vicuna-13B,1054
4
- tatsu-lab/alpaca-7B,NaN
5
- metaai/llama-7B,NaN
6
- metaai/llama-13B,854
7
- camel-ai/CAMEL-13B-Combined-Data,NaN
8
- BlinkDL/RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth,NaN
9
- databricks/dolly-v2-12b,866
10
- FreedomIntelligence/phoenix-inst-chat-7b,NaN
11
- h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2,NaN
12
- lmsys/fastchat-t5-3b-v1.0,941
13
- Neutralzz/BiLLa-7B-SFT,NaN
14
- nomic-ai/gpt4all-13b-snoozy,NaN
15
- openaccess-ai-collective/manticore-13b-chat-pyg,NaN
16
- OpenAssistant/oasst-sft-1-pythia-12b,921
17
- project-baize/baize-v2-7B,NaN
18
- BAIR/koala-7b,NaN
19
- BAIR/koala-13b,980
20
- StabilityAI/stablelm-tuned-alpha-7b,882
21
- togethercomputer/RedPajama-INCITE-7B-Chat,NaN
 
1
+ model,average,ARC (25-s),HellaSwag (10-s),TruthfulQA (MC) (0-s)
2
+ lmsys/vicuna-7B,60.0,53.5,77.5,49.0
3
+ lmsys/vicuna-13B,61.6,52.9,80.1,51.8
4
+ tatsu-lab/alpaca-7B,56.4,52.6,76.9,39.6
5
+ metaai/llama-7B,54.3,51.1,77.7,34.1
6
+ metaai/llama-13B,59.0,56.3,80.9,39.9
7
+ camel-ai/CAMEL-13B-Combined-Data,60.7,55.5,79.3,47.3
8
+ BlinkDL/RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth,NaN,NaN,NaN,NaN
9
+ databricks/dolly-v2-12b,49.1,42.2,71.8,33.4
10
+ FreedomIntelligence/phoenix-inst-chat-7b,51.8,45.0,63.2,47.1
11
+ h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2,45.5,36.9,61.6,37.9
12
+ lmsys/fastchat-t5-3b-v1.0,43.7,35.9,46.4,48.8
13
+ Neutralzz/BiLLa-7B-SFT,34.2,27.7,26.0,49.0
14
+ nomic-ai/gpt4all-13b-snoozy,61.1,56.1,78.7,48.4
15
+ openaccess-ai-collective/manticore-13b-chat-pyg,63.2,58.7,82.0,48.9
16
+ OpenAssistant/oasst-sft-1-pythia-12b,51.6,45.6,69.9,39.2
17
+ project-baize/baize-v2-7B,55.1,48.5,75.0,41.7
18
+ BAIR/koala-7b,55.6,47.1,73.7,46.0
19
+ BAIR/koala-13b,60.2,52.9,77.5,50.1
20
+ StabilityAI/stablelm-tuned-alpha-7b,41.9,31.9,53.6,40.2
21
+ togethercomputer/RedPajama-INCITE-7B-Chat,49.7,42.2,70.8,36.1