File size: 5,318 Bytes
fea7ea6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
Organization,Model,Size,Arena Elo,MT-Bench,Arena-Hard,EQ-Bench,MAGI-Hard,ARC-c,HellaSwag,MMLU,TruthfulQA,WinoGrande,GSM-8K
Anthropic,Claude-1,,1150,7.9,,76.83,,,,77,,,
Anthropic,Claude-2.0,,1132,8.06,24,72.89,,,,78.5,,,
Anthropic,Claude-2.1,,1119,8.18,22.8,73.96,,,,,,,
Anthropic,Claude-3-Haiku-20240307,,1181,,41.5,63.65,47.71,89.2,85.9,75.2,,74.2,
Anthropic,Claude-3-Opus-20240229        ,,1251,9.43,60.4,82.19,76.55,96.4,95.4,86.8,,88.5,
Anthropic,Claude-3-Sonnet-20240229,,1202,9.18,46.8,80.45,61.01,93.2,89,79,,75.1,
Cohere,Command-R-Plus,104B,1192,,33.1,76.11,49.7,70.39,87.96,74.02,56.95,83.82,47.31
Cohere,Command-R-v01,35B,1148,,17,56.05,43.27,65.53,87,68.2,52.32,81.53,56.63
Databricks,DBRX-Instruct        ,132B,1102,8.26,23.9,76.82,57.13,68.9,89,73.7,66.9,81.8,66.9
DeepSeekAI,DeepSeek-LLM-67B-Chat,67B,1079,,,,,67.75,86.82,72.42,55.85,84.21,63.68
DeepSeekAI,DeepSeek-LLM-7B-Chat,7B,,,,,,55.8,79.38,51.75,47.98,74.82,46.55
Google,Gemini-1.0-Pro,,1136,,17.8,,,,,71.8,,,
Google,Gemini-1.5-Pro-API-0409-Preview,,1248,,,,,,,81.9,,,
Google,Gemma-1.1-7B-it,7B,1085,,,59.17,38.43,,,,,,
Google,Gemma-2B-it,2B,,,3,23.26,24.16,43.94,62.7,37.65,45.82,60.93,5.46
Google,Gemma-7B-it,7B,1043,,7.5,61.72,24.85,53.2,81.2,64.3,31.81,72.3,46.4
OpenAI,GPT-3.5-Turbo-0125,,1106,,23.3,64.97,42.65,,,,,,
OpenAI,GPT-3.5-Turbo-0301,,1108,7.94,18.1,70.67,46.66,85.2,85.5,70,47,81.6,57.1
OpenAI,GPT-3.5-Turbo-0613,,1120,8.39,24.8,69.35,40.55,,,,,,
OpenAI,GPT-3.5-Turbo-1106,,1072,8.32,18.9,71.74,43.17,,,,,,
OpenAI,GPT-4-0125-Preview,,1247,,78,83.87,76.83,,,,,,
OpenAI,GPT-4-0314,,1189,8.96,50,85.73,75.67,96.3,95.3,86.4,59,87.5,92
OpenAI,GPT-4-0613,,1165,9.18,37.9,84.79,77.85,,,,,,
OpenAI,GPT-4-1106-Preview,,1253,9.32,,86.05,74.96,,,,,,
OpenAI,GPT-4-Turbo-2024-04-09,,1257,,82.6,86.35,77.74,,,,,,
InternLM,InternLM2-Chat-20B,20B,,7.9,,,,,,66.5,,,
InternLM,InternLM2-Chat-7B,7B,,7.7,,62.61,38.43,,,63.7,,,
Meta,Llama-2-13b-chat-hf,13B,1054,6.65,,49.12,28.2,59.04,81.94,54.64,44.12,74.51,15.24
Meta,Llama-2-70b-chat-hf,70B,1088,6.86,11.6,73.59,35.4,64.59,85.88,63.91,52.8,80.51,26.69
Meta,Llama-2-7b-chat-hf        ,7B,1040,6.27,4.6,36.32,27.5,52.9,78.55,48.32,45.57,71.74,7.35
Meta,Llama-3-70b-instruct,70B,1207,,41.1,82.13,67.97,71.42,85.69,80.06,61.81,82.87,85.44
Meta,Llama-3-8b-instruct,8B,1146,,20.6,68.88,63.84,60.75,78.55,67.07,51.65,74.51,68.69
Mistral,Mistral-7B-Instruct-v0.1,7B,1011,6.84,,52.15,30.69,54.52,75.63,55.38,56.28,73.72,14.25
Mistral,Mistral-7B-Instruct-v0.2,7B,1073,7.6,12.6,68.18,34.69,63.14,84.88,60.78,68.26,77.19,40.03
Mistral,Mistral-large-2402,,1158,8.66,37.7,85.17,67.69,94,89.2,81.2,50.5,86.7,81
Mistral,Mistral-medium,,1148,8.61,31.9,82.57,62.15,89.9,88,75.3,,88,66.7
Mistral,Mixtral-8x22B-Instruct-v0.1,141B,1147,,36.4,78.79,62.41,72.7,89.08,77.77,68.14,85.16,82.03
Mistral,Mixtral-8x7b-Instruct-v0.1,47B,1114,8.3,23.4,72.37,45.74,70.22,87.63,71.16,64.58,81.37,60.73
OpenChat,OpenChat-3.5-0106,7B,1098,7.8,,,,66.04,82.93,65.04,51.9,81.77,68.16
OrionStarAI,Orion-14B-Chat,14B,,7.37,,59.71,40.74,,,61.7,,,
Microsoft,Phi-3-Mini-128k-Instruct,3.8B,1064,,,,,63.14,80.09,68.7,54.12,72.85,69.52
Microsoft,Phi-3-Mini-4k-Instruct,3.8B,,,,58.15,53.26,62.97,80.6,69.08,59.88,72.38,74.53
Alibaba,Qwen-14B-Chat,14B,1038,6.96,,63.47,39.74,,,66.5,,,
Alibaba,Qwen-7B-Chat,7B,,,,50.11,33.44,,,57,,,
Alibaba,Qwen1.5-1.8B-Chat,1.8B,,,,24.12,31.56,38.74,60.02,45.87,40.62,59.67,19.03
Alibaba,Qwen1.5-14B-Chat,14B,1119,7.91,,74.99,49.27,58.79,82.33,68.52,60.38,73.32,30.86
Alibaba,Qwen1.5-32B-Chat,32B,1135,8.3,,75.59,60.72,66.04,85.49,74.99,66.95,77.19,7.05
Alibaba,Qwen1.5-4B-Chat,4B,,,,28.75,32.66,43.26,69.73,55.55,44.79,64.96,2.43
Alibaba,Qwen1.5-72B-Chat        ,72B,1153,8.61,36.1,82.81,63.47,68.52,86.42,77.44,63.9,79.08,20.39
Alibaba,Qwen1.5-7B-Chat,7B,1073,7.6,,54.41,41.59,55.89,78.56,61.7,53.65,67.8,13.19
RekaAI,Reka-Edge,7B,,7.6,,,,,,65.7,,,
RekaAI,Reka-Flash,21B,1149,8.2,,,,,,73.5,,,
RekaAI,Reka-Core,,,,,,,,,83.2,,,
Upstage,SOLAR-10.7B-Instruct-v1.0,10.7B,1065,7.58,,73.53,39.62,71.08,88.16,66.21,71.43,83.58,64.75
Nexusflow,Starling-LM-7B-alpha,7B,1091,8.09,12.8,73.9,37.06,63.82,84.9,64.67,46.39,80.58,62.4
Nexusflow,Starling-LM-7B-beta,7B,1119,8.12,23,73.82,40.12,67.24,83.47,65.14,55.47,81.29,66.64
AllenAI,Tulu-2-DPO-70B,70B,1102,7.89,15,76.63,50.23,72.1,88.99,69.84,65.78,83.27,62.62
LMSys,Vicuna-13B-v1.5,13B,1047,6.57,,67.39,28.75,57.08,81.24,56.67,51.51,74.66,11.3
LMSys,Vicuna-33B-v1.3,33B,1093,7.12,8.6,67.07,31.66,,,59.2,,,
LMSys,Vicuna-7B-v1.1,7B,1009,6.17,,26.12,27.38,53.67,77.46,45.63,48.94,70.96,5.53
Microsoft,WizardLM-13b-v1.2,13B,1061,7.2,,63.71,29.1,,,52.7,,,
Microsoft,WizardLM-2-70B,70B,,8.92,,,,,,,,,
Microsoft,WizardLM-2-7B,7B,,8.28,,69.31,35.4,63.23,83.41,61.75,57.01,73.48,43.59
Microsoft,WizardLM-2-8x22B,141B,,9.12,,77.91,59.16,72.44,89.05,76.77,60.5,82.24,84.61
Microsoft,WizardLM-70B-v1.0,70B,1108,7.71,,,,64.52,83.21,63.32,54.6,,
01.AI,Yi-34B-Chat,34B,1110,7.88,23.1,71.62,57.1,65.1,84.08,74.87,55.41,79.79,19.79
01.AI,Yi-6B-Chat,6B,,,,61.79,38.74,,,60.99,,,
HuggingFace,Zephyr-7b-alpha,7B,1042,6.88,,56.82,35.15,61.01,84.04,61.39,57.9,78.61,14.03
HuggingFace,Zephyr-7b-beta,7B,1054,7.34,,58.33,35.97,62.03,84.36,61.07,57.45,77.74,29.04
HuggingFace,Zephyr-ORPO-141b-A35b-v0.1,141B,1125,8.17,,,,,,,,,