Spaces:
Running
Running
feat: update 9 models
#4
by
Jerry0723
- opened
- app.py +1 -1
- changelog.md +16 -1
- data/chinese_benchmark_gen.csv +9 -0
- data/chinese_benchmark_per.csv +9 -0
- data/subclass_gen.csv +10 -1
- data/subclass_per.csv +10 -1
app.py
CHANGED
@@ -46,7 +46,7 @@ _BIBTEX = """
|
|
46 |
}
|
47 |
"""
|
48 |
|
49 |
-
_LAST_UPDATED = "
|
50 |
|
51 |
banner_url = "./assets/logo.png"
|
52 |
_BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 300px; max-width: 600px;"> </div>' # noqa
|
|
|
46 |
}
|
47 |
"""
|
48 |
|
49 |
+
_LAST_UPDATED = "December 28, 2024"
|
50 |
|
51 |
banner_url = "./assets/logo.png"
|
52 |
_BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 300px; max-width: 600px;"> </div>' # noqa
|
changelog.md
CHANGED
@@ -26,4 +26,19 @@ version: v1.0.3
|
|
26 |
changed:
|
27 |
- [1]feat: add three Qwen instruct models
|
28 |
- [2]feat: remove Qwen base models
|
29 |
-
- [3]feat: update some models' name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
changed:
|
27 |
- [1]feat: add three Qwen instruct models
|
28 |
- [2]feat: remove Qwen base models
|
29 |
+
- [3]feat: update some models' name
|
30 |
+
|
31 |
+
### 2024-12-28
|
32 |
+
version: v1.0.4
|
33 |
+
|
34 |
+
changed:
|
35 |
+
- [1]feat: update 9 models due to the December's todo-list:
|
36 |
+
- QwQ-32B-Preview
|
37 |
+
- Llama-3.1-70B-Instruct
|
38 |
+
- Llama-3.3-70B-Instruct
|
39 |
+
- Mistral-Nemo-Instruct-2407
|
40 |
+
- Ministral-8B-Instruct-2410
|
41 |
+
- Phi-3-small-8k-instruct
|
42 |
+
- Phi-3-small-128k-instruct
|
43 |
+
- Phi-3-medium-4k-instruct
|
44 |
+
- Phi-3-medium-128k-instruct
|
data/chinese_benchmark_gen.csv
CHANGED
@@ -5,19 +5,28 @@ Qwen2.5-72B-Instruct >65B 63.41/0.77 66.00/0.95 56.00/0.62 61.49/0.65 70.90/0.96
|
|
5 |
Qwen1.5-72B-Chat >65B 62.91/0.50 73.86/0.84 40.46/0.97 58.75/0.35 85.55/0.62
|
6 |
Qwen2-72B-Instruct >65B 54.08/0.20 58.10/0.60 30.72/0.45 52.63/0.05 77.65/0.36
|
7 |
Opt-66B >65B 54.46/0.17 53.22/0.06 76.94/0.24 57.73/0.49 31.77/0.28
|
|
|
|
|
8 |
Qwen2.5-32B-Instruct ~30B 69.64/0.39 92.13/0.45 43.24/0.83 62.70/0.25 96.27/0.20
|
|
|
9 |
Yi-1.5-34B-Chat ~30B 60.06/0.43 58.14/0.40 72.51/0.55 63.27/0.56 47.56/0.42
|
10 |
Opt-30B ~30B 50.88/0.11 50.76/0.12 72.95/0.16 51.18/0.26 28.62/0.28
|
11 |
InternLM2-Chat-20B 10B~20B 70.21/0.55 73.30/0.70 63.79/0.43 67.82/0.45 76.65/0.67
|
12 |
Qwen1.5-14B-Chat 10B~20B 68.25/0.44 65.87/0.37 76.02/0.72 71.51/0.59 60.44/0.20
|
|
|
13 |
Baichuan2-13B-Chat 10B~20B 62.86/0.31 64.17/0.33 58.61/0.80 61.75/0.30 67.13/0.56
|
|
|
|
|
14 |
Ziya2-13B-Chat 10B~20B 53.40/0.43 53.33/0.38 56.18/0.41 53.48/0.53 50.62/0.61
|
15 |
Opt-13B 10B~20B 50.18/0.26 50.29/0.20 69.97/0.37 49.94/0.47 30.22/0.31
|
|
|
16 |
Gemma-1.1-7B-it 5B~10B 71.70/0.26 68.66/0.37 80.11/0.05 76.00/0.09 63.26/0.47
|
17 |
DeepSeek-LLM-7B-Chat 5B~10B 71.63/0.17 69.50/0.15 77.33/0.67 74.33/0.41 65.90/0.38
|
18 |
GLM-4-9B-Chat 5B~10B 70.96/0.23 82.15/0.55 53.73/0.48 65.50/0.18 88.27/0.41
|
19 |
Mistral-7B-Instruct-v0.3 5B~10B 70.41/0.41 68.55/0.52 75.67/0.22 72.71/0.26 65.12/0.58
|
20 |
Qwen1.5-7B-Chat 5B~10B 70.36/0.39 64.66/0.27 90.09/0.57 83.55/0.82 50.53/0.18
|
|
|
|
|
21 |
Yi-1.5-9B-Chat 5B~10B 62.12/0.38 64.42/0.42 54.53/0.43 60.43/0.36 69.75/0.37
|
22 |
Llama3-ChatQA-1.5-8B 5B~10B 61.28/0.40 57.63/0.20 85.84/0.43 72.02/0.95 36.61/0.54
|
23 |
Baichuan2-7B-Chat 5B~10B 59.43/0.24 72.06/0.66 31.11/0.40 55.95/0.12 87.89/0.20
|
|
|
5 |
Qwen1.5-72B-Chat >65B 62.91/0.50 73.86/0.84 40.46/0.97 58.75/0.35 85.55/0.62
|
6 |
Qwen2-72B-Instruct >65B 54.08/0.20 58.10/0.60 30.72/0.45 52.63/0.05 77.65/0.36
|
7 |
Opt-66B >65B 54.46/0.17 53.22/0.06 76.94/0.24 57.73/0.49 31.77/0.28
|
8 |
+
Llama-3.1-70B-Instruct >65B 52.84/0.38 59.07/1.22 19.82/0.85 51.57/0.24 86.14/0.58
|
9 |
+
Llama-3.3-70B-Instruct >65B 50.87/0.07 54.51/0.86 13.19/0.10 50.37/0.06 88.89/0.39
|
10 |
Qwen2.5-32B-Instruct ~30B 69.64/0.39 92.13/0.45 43.24/0.83 62.70/0.25 96.27/0.20
|
11 |
+
QwQ-32B-Preview ~30B 69.55/0.28 75.97/0.48 57.60/0.27 65.61/0.17 81.62/0.33
|
12 |
Yi-1.5-34B-Chat ~30B 60.06/0.43 58.14/0.40 72.51/0.55 63.27/0.56 47.56/0.42
|
13 |
Opt-30B ~30B 50.88/0.11 50.76/0.12 72.95/0.16 51.18/0.26 28.62/0.28
|
14 |
InternLM2-Chat-20B 10B~20B 70.21/0.55 73.30/0.70 63.79/0.43 67.82/0.45 76.65/0.67
|
15 |
Qwen1.5-14B-Chat 10B~20B 68.25/0.44 65.87/0.37 76.02/0.72 71.51/0.59 60.44/0.20
|
16 |
+
Phi-3-medium-128k-instruct 10B~20B 64.30/0.06 63.89/0.13 66.53/0.52 64.76/0.26 62.05/0.42
|
17 |
Baichuan2-13B-Chat 10B~20B 62.86/0.31 64.17/0.33 58.61/0.80 61.75/0.30 67.13/0.56
|
18 |
+
Mistral-Nemo-Instruct-2407 10B~20B 59.71/0.45 61.79/0.52 51.82/0.48 58.20/0.44 67.68/0.44
|
19 |
+
Phi-3-medium-4k-instruct 10B~20B 57.79/0.45 58.69/0.37 53.88/0.62 57.02/0.55 61.74/0.55
|
20 |
Ziya2-13B-Chat 10B~20B 53.40/0.43 53.33/0.38 56.18/0.41 53.48/0.53 50.62/0.61
|
21 |
Opt-13B 10B~20B 50.18/0.26 50.29/0.20 69.97/0.37 49.94/0.47 30.22/0.31
|
22 |
+
Phi-3-small-8k-instruct 5B~10B 72.73/0.47 73.67/0.63 71.12/0.49 71.85/0.35 74.36/0.59
|
23 |
Gemma-1.1-7B-it 5B~10B 71.70/0.26 68.66/0.37 80.11/0.05 76.00/0.09 63.26/0.47
|
24 |
DeepSeek-LLM-7B-Chat 5B~10B 71.63/0.17 69.50/0.15 77.33/0.67 74.33/0.41 65.90/0.38
|
25 |
GLM-4-9B-Chat 5B~10B 70.96/0.23 82.15/0.55 53.73/0.48 65.50/0.18 88.27/0.41
|
26 |
Mistral-7B-Instruct-v0.3 5B~10B 70.41/0.41 68.55/0.52 75.67/0.22 72.71/0.26 65.12/0.58
|
27 |
Qwen1.5-7B-Chat 5B~10B 70.36/0.39 64.66/0.27 90.09/0.57 83.55/0.82 50.53/0.18
|
28 |
+
Phi-3-small-128k-instruct 5B~10B 67.43/0.26 72.10/0.54 57.35/0.17 64.33/0.09 77.61/0.43
|
29 |
+
Ministral-8B-Instruct-2410 5B~10B 62.32/0.01 62.71/0.19 61.60/0.29 61.94/0.19 63.05/0.28
|
30 |
Yi-1.5-9B-Chat 5B~10B 62.12/0.38 64.42/0.42 54.53/0.43 60.43/0.36 69.75/0.37
|
31 |
Llama3-ChatQA-1.5-8B 5B~10B 61.28/0.40 57.63/0.20 85.84/0.43 72.02/0.95 36.61/0.54
|
32 |
Baichuan2-7B-Chat 5B~10B 59.43/0.24 72.06/0.66 31.11/0.40 55.95/0.12 87.89/0.20
|
data/chinese_benchmark_per.csv
CHANGED
@@ -4,19 +4,28 @@ Qwen1.5-72B-Chat >65B 63.67/0.46 58.27/0.32 96.84/0.13 90.51/0.57 30.34/0.80
|
|
4 |
Qwen2.5-72B-Instruct >65B 63.27/0.52 66.00/0.60 55.09/0.82 61.31/0.46 71.49/0.25
|
5 |
Qwen2-72B-Instruct >65B 60.70/0.49 57.90/0.42 79.03/0.63 66.75/0.77 42.28/0.43
|
6 |
Opt-66B >65B 59.93/0.41 56.52/0.37 86.87/0.59 71.36/0.78 32.86/0.74
|
|
|
7 |
Llama3-ChatQA-1.5-70B >65B 40.41/0.29 33.86/0.75 19.84/0.75 43.13/0.25 61.08/0.37
|
|
|
8 |
Yi-1.5-34B-Chat ~30B 66.02/0.22 80.13/0.55 42.82/0.25 60.86/0.16 89.33/0.41
|
9 |
Qwen2.5-32B-Instruct ~30B 64.33/0.46 62.46/0.44 72.24/0.71 66.91/0.53 56.38/0.18
|
10 |
Opt-30B ~30B 53.82/0.03 54.42/0.21 48.32/0.20 53.34/0.11 59.34/0.27
|
|
|
|
|
11 |
Baichuan2-13B-Chat 10B~20B 70.43/0.39 65.81/0.38 85.34/0.63 79.02/0.63 55.46/0.47
|
|
|
|
|
12 |
Qwen1.5-14B-Chat 10B~20B 61.29/0.40 57.02/0.32 92.43/0.55 79.80/1.05 30.02/0.47
|
13 |
Ziya2-13B-Chat 10B~20B 55.25/0.26 59.24/0.37 34.30/0.11 53.61/0.26 76.29/0.39
|
14 |
InternLM2-Chat-20B 10B~20B 53.67/0.16 79.00/0.66 10.30/0.60 51.90/0.11 97.25/0.26
|
15 |
Opt-13B 10B~20B 49.31/0.31 37.77/3.57 1.76/0.16 49.59/0.23 97.08/0.29
|
16 |
Gemma-1.1-7B-it 5B~10B 64.32/0.68 59.98/0.58 86.60/0.35 75.70/0.80 41.95/0.93
|
17 |
Qwen1.5-7B-Chat 5B~10B 62.48/0.54 59.06/0.48 81.92/0.50 70.28/0.65 42.96/0.81
|
|
|
18 |
Yi-1.5-9B-Chat 5B~10B 60.35/0.52 79.47/1.37 28.16/0.33 56.22/0.39 92.69/0.59
|
|
|
19 |
DeepSeek-LLM-7B-Chat 5B~10B 56.79/0.19 84.83/1.23 16.77/0.09 53.70/0.15 96.99/0.27
|
|
|
20 |
GPT-J-6B 5B~10B 55.98/0.42 80.27/1.42 16.11/0.86 53.26/0.23 96.03/0.20
|
21 |
Baichuan2-7B-Chat 5B~10B 53.99/0.51 62.89/1.57 19.96/0.88 52.31/0.30 88.18/0.23
|
22 |
GLM-4-9B-Chat 5B~10B 50.03/0.15 50.07/0.13 99.31/0.22 44.12/9.01 0.52/0.04
|
|
|
4 |
Qwen2.5-72B-Instruct >65B 63.27/0.52 66.00/0.60 55.09/0.82 61.31/0.46 71.49/0.25
|
5 |
Qwen2-72B-Instruct >65B 60.70/0.49 57.90/0.42 79.03/0.63 66.75/0.77 42.28/0.43
|
6 |
Opt-66B >65B 59.93/0.41 56.52/0.37 86.87/0.59 71.36/0.78 32.86/0.74
|
7 |
+
Llama-3.1-70B-Instruct >65B 43.68/0.41 36.45/0.84 16.66/0.34 45.83/0.30 70.82/0.48
|
8 |
Llama3-ChatQA-1.5-70B >65B 40.41/0.29 33.86/0.75 19.84/0.75 43.13/0.25 61.08/0.37
|
9 |
+
Llama-3.3-70B-Instruct >65B 36.84/0.82 32.02/1.29 23.19/1.13 39.58/0.63 50.55/0.69
|
10 |
Yi-1.5-34B-Chat ~30B 66.02/0.22 80.13/0.55 42.82/0.25 60.86/0.16 89.33/0.41
|
11 |
Qwen2.5-32B-Instruct ~30B 64.33/0.46 62.46/0.44 72.24/0.71 66.91/0.53 56.38/0.18
|
12 |
Opt-30B ~30B 53.82/0.03 54.42/0.21 48.32/0.20 53.34/0.11 59.34/0.27
|
13 |
+
QwQ-32B-Preview ~30B 51.82/0.06 51.04/0.10 94.83/0.28 62.38/0.26 8.61/0.39
|
14 |
+
Phi-3-medium-4k-instruct 10B~20B 71.04/0.31 69.74/0.29 74.56/0.97 72.54/0.59 67.49/0.89
|
15 |
Baichuan2-13B-Chat 10B~20B 70.43/0.39 65.81/0.38 85.34/0.63 79.02/0.63 55.46/0.47
|
16 |
+
Phi-3-medium-128k-instruct 10B~20B 68.87/0.81 68.08/0.51 71.32/1.44 69.75/1.17 66.41/0.57
|
17 |
+
Mistral-Nemo-Instruct-2407 10B~20B 66.88/0.46 62.56/0.28 84.42/0.90 75.89/1.13 49.26/0.24
|
18 |
Qwen1.5-14B-Chat 10B~20B 61.29/0.40 57.02/0.32 92.43/0.55 79.80/1.05 30.02/0.47
|
19 |
Ziya2-13B-Chat 10B~20B 55.25/0.26 59.24/0.37 34.30/0.11 53.61/0.26 76.29/0.39
|
20 |
InternLM2-Chat-20B 10B~20B 53.67/0.16 79.00/0.66 10.30/0.60 51.90/0.11 97.25/0.26
|
21 |
Opt-13B 10B~20B 49.31/0.31 37.77/3.57 1.76/0.16 49.59/0.23 97.08/0.29
|
22 |
Gemma-1.1-7B-it 5B~10B 64.32/0.68 59.98/0.58 86.60/0.35 75.70/0.80 41.95/0.93
|
23 |
Qwen1.5-7B-Chat 5B~10B 62.48/0.54 59.06/0.48 81.92/0.50 70.28/0.65 42.96/0.81
|
24 |
+
Phi-3-small-128k-instruct 5B~10B 61.76/0.27 60.47/0.16 68.45/0.61 63.46/0.50 55.05/0.61
|
25 |
Yi-1.5-9B-Chat 5B~10B 60.35/0.52 79.47/1.37 28.16/0.33 56.22/0.39 92.69/0.59
|
26 |
+
Phi-3-small-8k-instruct 5B~10B 59.47/0.39 56.25/0.30 86.06/0.40 70.05/0.85 32.75/0.49
|
27 |
DeepSeek-LLM-7B-Chat 5B~10B 56.79/0.19 84.83/1.23 16.77/0.09 53.70/0.15 96.99/0.27
|
28 |
+
Ministral-8B-Instruct-2410 5B~10B 56.28/0.51 55.10/0.51 68.83/0.58 58.24/0.51 43.66/0.54
|
29 |
GPT-J-6B 5B~10B 55.98/0.42 80.27/1.42 16.11/0.86 53.26/0.23 96.03/0.20
|
30 |
Baichuan2-7B-Chat 5B~10B 53.99/0.51 62.89/1.57 19.96/0.88 52.31/0.30 88.18/0.23
|
31 |
GLM-4-9B-Chat 5B~10B 50.03/0.15 50.07/0.13 99.31/0.22 44.12/9.01 0.52/0.04
|
data/subclass_gen.csv
CHANGED
@@ -5,14 +5,20 @@ Qwen2.5-72B-Instruct,>65B,0.6248,0.6318,0.5580,0.8125,0.7581,0.9309,0.3779,0.155
|
|
5 |
Qwen2-72B-Instruct,>65B,0.4969,0.4670,0.2029,0.6210,0.6897,0.4713,0.3983,0.0356,0.0085,0.5508,0.6602,0.3609,0.6984,0.7472,0.6237,0.6711,0.7073,0.5588,0.5013,0.4768,0.2114,0.4109,0.1184,0.0309,0.6349,0.6718,0.4834,0.4284,0.2565,0.0767
|
6 |
Opt-66B,>65B,0.4866,0.482,0.682,0.5174,0.5203,0.7258,0.5579,0.5338,0.8237,0.5646,0.5728,0.7868,0.5385,0.535,0.7659,0.5571,0.5309,0.8257,0.5414,0.5199,0.7954,0.5354,0.5181,0.7801,0.5376,0.515,0.7909,0.5079,0.5041,0.7185
|
7 |
Llama3-ChatQA-1.5-70B,>65B,0.6682,0.6617,0.6566,0.6859,0.6932,0.6922,0.6079,0.6187,0.5348,0.6548,0.7024,0.6342,0.6861,0.6945,0.6928,0.7029,0.6853,0.7281,0.6211,0.6242,0.5599,0.6105,0.6189,0.5397,0.7134,0.6873,0.7493,0.59,0.6072,0.4996
|
|
|
|
|
8 |
Yi-1.5-34B-Chat,~30B,0.66,0.6114,0.8339,0.7311,0.6644,0.9577,0.3309,0.2379,0.1626,0.6958,0.6708,0.8646,0.7046,0.6528,0.9053,0.7084,0.6383,0.9309,0.5928,0.5672,0.6961,0.4467,0.4308,0.3972,0.6956,0.6281,0.9097,0.5182,0.515,0.5425
|
9 |
Qwen2.5-32B-Instruct,~30B,0.6204,0.8741,0.2629,0.9049,0.9606,0.8489,0.5103,0.5470,0.0453,0.8192,0.9583,0.6983,0.8514,0.9560,0.7445,0.7823,0.9396,0.5931,0.5869,0.8351,0.1922,0.5244,0.6511,0.0699,0.8334,0.9475,0.6950,0.5157,0.6401,0.0644
|
10 |
Opt-30B,~30B,0.4672,0.4683,0.6648,0.5002,0.5082,0.7109,0.5044,0.4987,0.7354,0.5314,0.5517,0.7422,0.5108,0.5163,0.7304,0.5161,0.5039,0.7618,0.513,0.5009,0.7578,0.4956,0.4908,0.719,0.5119,0.4977,0.7583,0.4958,0.4955,0.7134
|
|
|
11 |
Baichuan2-13B-Chat,10B~20B,0.6337,0.6402,0.5755,0.7188,0.7164,0.7457,0.5185,0.5189,0.3417,0.7341,0.7487,0.7703,0.7033,0.7091,0.7143,0.6742,0.6712,0.6575,0.5657,0.5728,0.434,0.6151,0.6264,0.5371,0.6515,0.65,0.6089,0.5532,0.5707,0.414
|
12 |
Qwen1.5-14B-Chat,10B~20B,0.7099,0.6657,0.8141,0.7897,0.7205,0.9615,0.5669,0.5657,0.5226,0.7776,0.7373,0.9181,0.7571,0.7073,0.897,0.7862,0.7044,0.97,0.6421,0.6225,0.6757,0.5014,0.4893,0.3888,0.7563,0.6869,0.9116,0.5499,0.5538,0.4889
|
13 |
Ziya2-13B-Chat,10B~20B,0.5403,0.5272,0.5731,0.6597,0.6313,0.8034,0.3259,0.2145,0.1373,0.673,0.6631,0.8101,0.6526,0.6282,0.7886,0.5583,0.5437,0.6097,0.3987,0.3541,0.2823,0.529,0.5194,0.5497,0.5377,0.5208,0.5678,0.4567,0.4484,0.4035
|
14 |
InternLM2-Chat-20B,10B~20B,0.6819,0.7156,0.5781,0.7661,0.7819,0.7518,0.5506,0.5823,0.3134,0.8061,0.8182,0.8271,0.807,0.7993,0.832,0.8128,0.7876,0.8453,0.7037,0.7305,0.6224,0.6092,0.6548,0.4308,0.7815,0.7702,0.7821,0.5613,0.6058,0.3396
|
15 |
Opt-13B,10B~20B,0.4746,0.4724,0.637,0.5147,0.519,0.7014,0.5146,0.5059,0.7153,0.5333,0.5557,0.7126,0.5261,0.5278,0.7228,0.5187,0.506,0.7257,0.5232,0.5081,0.7367,0.5218,0.5094,0.7314,0.4956,0.4856,0.6828,0.4722,0.4773,0.6264
|
|
|
|
|
|
|
16 |
Gemma-1.1-7B-it,5B~10B,0.7849,0.7205,0.9139,0.8081,0.7454,0.9485,0.6024,0.6084,0.5413,0.7854,0.758,0.8894,0.8017,0.7436,0.9353,0.8215,0.7367,0.9884,0.6669,0.6543,0.673,0.5811,0.5858,0.4976,0.7831,0.7167,0.9127,0.6684,0.6638,0.6754
|
17 |
Qwen1.5-7B-Chat,5B~10B,0.6885,0.6347,0.8535,0.7677,0.6891,0.9938,0.6929,0.6404,0.8588,0.7791,0.7151,0.9869,0.7653,0.6889,0.988,0.7485,0.6659,0.9746,0.684,0.6317,0.8443,0.7267,0.6564,0.929,0.7473,0.662,0.9772,0.5545,0.5496,0.5778
|
18 |
Yi-1.5-9B-Chat,5B~10B,0.7025,0.6913,0.7058,0.7032,0.7106,0.707,0.4533,0.3925,0.2,0.6546,0.7097,0.6172,0.7209,0.7213,0.7419,0.8197,0.7508,0.9452,0.5595,0.5666,0.4131,0.4342,0.3378,0.1591,0.7626,0.7215,0.8306,0.4057,0.2654,0.1096
|
@@ -23,4 +29,7 @@ GLM-4-9B-Chat,5B~10B,0.7691,0.8562,0.6352,0.7669,0.868,0.6424,0.4801,0.3396,0.05
|
|
23 |
InternLM2-Chat-7B,5B~10B,0.53,0.5321,0.3028,0.6981,0.7292,0.6512,0.5182,0.5207,0.2824,0.6362,0.7192,0.5431,0.6717,0.7137,0.6002,0.6512,0.6763,0.551,0.5731,0.5951,0.3902,0.5205,0.5203,0.2849,0.6413,0.6626,0.5289,0.4783,0.4438,0.2061
|
24 |
Opt-6.7B,5B~10B,0.4717,0.4691,0.6091,0.5087,0.5153,0.6691,0.4931,0.4895,0.6491,0.5308,0.5556,0.6899,0.5215,0.5249,0.6922,0.4969,0.4902,0.6595,0.4803,0.4756,0.6266,0.488,0.4842,0.6406,0.4819,0.4741,0.6315,0.4627,0.4684,0.5853
|
25 |
Mistral-7B-Instruct-v0.3,5B~10B,0.7069,0.6749,0.7706,0.7521,0.7161,0.8533,0.5826,0.5868,0.5167,0.7142,0.7222,0.7711,0.7599,0.7205,0.8679,0.7956,0.7205,0.9509,0.6748,0.6547,0.7042,0.6139,0.6127,0.5802,0.7742,0.7074,0.9103,0.6388,0.6387,0.6313
|
26 |
-
Llama3-ChatQA-1.5-8B,5B~10B,0.6114,0.5657,0.8761,0.6276,0.5904,0.885,0.5978,0.5613,0.844,0.6056,0.6016,0.8128,0.6113,0.5825,0.8521,0.6365,0.5805,0.9258,0.6062,0.5625,0.8663,0.6034,0.5629,0.8569,0.6223,0.5694,0.903,0.5658,0.5447,0.7752
|
|
|
|
|
|
|
|
5 |
Qwen2-72B-Instruct,>65B,0.4969,0.4670,0.2029,0.6210,0.6897,0.4713,0.3983,0.0356,0.0085,0.5508,0.6602,0.3609,0.6984,0.7472,0.6237,0.6711,0.7073,0.5588,0.5013,0.4768,0.2114,0.4109,0.1184,0.0309,0.6349,0.6718,0.4834,0.4284,0.2565,0.0767
|
6 |
Opt-66B,>65B,0.4866,0.482,0.682,0.5174,0.5203,0.7258,0.5579,0.5338,0.8237,0.5646,0.5728,0.7868,0.5385,0.535,0.7659,0.5571,0.5309,0.8257,0.5414,0.5199,0.7954,0.5354,0.5181,0.7801,0.5376,0.515,0.7909,0.5079,0.5041,0.7185
|
7 |
Llama3-ChatQA-1.5-70B,>65B,0.6682,0.6617,0.6566,0.6859,0.6932,0.6922,0.6079,0.6187,0.5348,0.6548,0.7024,0.6342,0.6861,0.6945,0.6928,0.7029,0.6853,0.7281,0.6211,0.6242,0.5599,0.6105,0.6189,0.5397,0.7134,0.6873,0.7493,0.59,0.6072,0.4996
|
8 |
+
Llama-3.1-70B-Instruct,>65B,0.4845,0.3825,0.0896,0.5771,0.6976,0.3045,0.4546,0.2021,0.0359,0.6067,0.7722,0.3926,0.5946,0.7225,0.3403,0.5904,0.6813,0.3067,0.4817,0.3639,0.0828,0.4760,0.3471,0.0759,0.5340,0.5584,0.1851,0.4837,0.4207,0.1019
|
9 |
+
Llama-3.3-70B-Instruct,>65B,0.5045,0.4639,0.0849,0.5211,0.6327,0.1537,0.4943,0.4221,0.0718,0.5173,0.7089,0.1918,0.5728,0.7424,0.2569,0.5775,0.7071,0.2347,0.4964,0.4060,0.0668,0.4960,0.4244,0.0712,0.5183,0.5179,0.1065,0.4820,0.3636,0.0544
|
10 |
Yi-1.5-34B-Chat,~30B,0.66,0.6114,0.8339,0.7311,0.6644,0.9577,0.3309,0.2379,0.1626,0.6958,0.6708,0.8646,0.7046,0.6528,0.9053,0.7084,0.6383,0.9309,0.5928,0.5672,0.6961,0.4467,0.4308,0.3972,0.6956,0.6281,0.9097,0.5182,0.515,0.5425
|
11 |
Qwen2.5-32B-Instruct,~30B,0.6204,0.8741,0.2629,0.9049,0.9606,0.8489,0.5103,0.5470,0.0453,0.8192,0.9583,0.6983,0.8514,0.9560,0.7445,0.7823,0.9396,0.5931,0.5869,0.8351,0.1922,0.5244,0.6511,0.0699,0.8334,0.9475,0.6950,0.5157,0.6401,0.0644
|
12 |
Opt-30B,~30B,0.4672,0.4683,0.6648,0.5002,0.5082,0.7109,0.5044,0.4987,0.7354,0.5314,0.5517,0.7422,0.5108,0.5163,0.7304,0.5161,0.5039,0.7618,0.513,0.5009,0.7578,0.4956,0.4908,0.719,0.5119,0.4977,0.7583,0.4958,0.4955,0.7134
|
13 |
+
QwQ-32B-Preview,~30B,0.6837,0.7403,0.5470,0.8120,0.8219,0.8084,0.6060,0.6749,0.3914,0.7516,0.8198,0.6977,0.8121,0.8230,0.8081,0.8470,0.8208,0.8801,0.6113,0.6736,0.3973,0.6050,0.6700,0.3873,0.7492,0.7768,0.6783,0.4656,0.3791,0.1124
|
14 |
Baichuan2-13B-Chat,10B~20B,0.6337,0.6402,0.5755,0.7188,0.7164,0.7457,0.5185,0.5189,0.3417,0.7341,0.7487,0.7703,0.7033,0.7091,0.7143,0.6742,0.6712,0.6575,0.5657,0.5728,0.434,0.6151,0.6264,0.5371,0.6515,0.65,0.6089,0.5532,0.5707,0.414
|
15 |
Qwen1.5-14B-Chat,10B~20B,0.7099,0.6657,0.8141,0.7897,0.7205,0.9615,0.5669,0.5657,0.5226,0.7776,0.7373,0.9181,0.7571,0.7073,0.897,0.7862,0.7044,0.97,0.6421,0.6225,0.6757,0.5014,0.4893,0.3888,0.7563,0.6869,0.9116,0.5499,0.5538,0.4889
|
16 |
Ziya2-13B-Chat,10B~20B,0.5403,0.5272,0.5731,0.6597,0.6313,0.8034,0.3259,0.2145,0.1373,0.673,0.6631,0.8101,0.6526,0.6282,0.7886,0.5583,0.5437,0.6097,0.3987,0.3541,0.2823,0.529,0.5194,0.5497,0.5377,0.5208,0.5678,0.4567,0.4484,0.4035
|
17 |
InternLM2-Chat-20B,10B~20B,0.6819,0.7156,0.5781,0.7661,0.7819,0.7518,0.5506,0.5823,0.3134,0.8061,0.8182,0.8271,0.807,0.7993,0.832,0.8128,0.7876,0.8453,0.7037,0.7305,0.6224,0.6092,0.6548,0.4308,0.7815,0.7702,0.7821,0.5613,0.6058,0.3396
|
18 |
Opt-13B,10B~20B,0.4746,0.4724,0.637,0.5147,0.519,0.7014,0.5146,0.5059,0.7153,0.5333,0.5557,0.7126,0.5261,0.5278,0.7228,0.5187,0.506,0.7257,0.5232,0.5081,0.7367,0.5218,0.5094,0.7314,0.4956,0.4856,0.6828,0.4722,0.4773,0.6264
|
19 |
+
Mistral-Nemo-Instruct-2407,10B~20B,0.6375,0.6363,0.6018,0.6971,0.6973,0.7214,0.4741,0.4456,0.2722,0.6349,0.6873,0.6041,0.7122,0.7067,0.7508,0.7259,0.6960,0.7825,0.5252,0.5197,0.3718,0.4695,0.4343,0.2607,0.6126,0.6117,0.5492,0.4474,0.4009,0.2212
|
20 |
+
Phi-3-medium-4k-instruct,10B~20B,0.5533,0.5494,0.4889,0.5385,0.5594,0.4653,0.6034,0.6005,0.5922,0.5418,0.5993,0.4803,0.5866,0.6054,0.5590,0.5815,0.5780,0.5475,0.6178,0.6070,0.6217,0.6437,0.6287,0.6742,0.6028,0.5912,0.5893,0.5057,0.5054,0.3950
|
21 |
+
Phi-3-medium-128k-instruct,10B~20B,0.6379,0.6234,0.6581,0.6379,0.6437,0.6554,0.6504,0.6361,0.6823,0.5919,0.6413,0.5687,0.6431,0.6483,0.6654,0.6568,0.6374,0.6958,0.6632,0.6403,0.7087,0.6819,0.6546,0.7465,0.6796,0.6480,0.7433,0.5897,0.5935,0.5592
|
22 |
Gemma-1.1-7B-it,5B~10B,0.7849,0.7205,0.9139,0.8081,0.7454,0.9485,0.6024,0.6084,0.5413,0.7854,0.758,0.8894,0.8017,0.7436,0.9353,0.8215,0.7367,0.9884,0.6669,0.6543,0.673,0.5811,0.5858,0.4976,0.7831,0.7167,0.9127,0.6684,0.6638,0.6754
|
23 |
Qwen1.5-7B-Chat,5B~10B,0.6885,0.6347,0.8535,0.7677,0.6891,0.9938,0.6929,0.6404,0.8588,0.7791,0.7151,0.9869,0.7653,0.6889,0.988,0.7485,0.6659,0.9746,0.684,0.6317,0.8443,0.7267,0.6564,0.929,0.7473,0.662,0.9772,0.5545,0.5496,0.5778
|
24 |
Yi-1.5-9B-Chat,5B~10B,0.7025,0.6913,0.7058,0.7032,0.7106,0.707,0.4533,0.3925,0.2,0.6546,0.7097,0.6172,0.7209,0.7213,0.7419,0.8197,0.7508,0.9452,0.5595,0.5666,0.4131,0.4342,0.3378,0.1591,0.7626,0.7215,0.8306,0.4057,0.2654,0.1096
|
|
|
29 |
InternLM2-Chat-7B,5B~10B,0.53,0.5321,0.3028,0.6981,0.7292,0.6512,0.5182,0.5207,0.2824,0.6362,0.7192,0.5431,0.6717,0.7137,0.6002,0.6512,0.6763,0.551,0.5731,0.5951,0.3902,0.5205,0.5203,0.2849,0.6413,0.6626,0.5289,0.4783,0.4438,0.2061
|
30 |
Opt-6.7B,5B~10B,0.4717,0.4691,0.6091,0.5087,0.5153,0.6691,0.4931,0.4895,0.6491,0.5308,0.5556,0.6899,0.5215,0.5249,0.6922,0.4969,0.4902,0.6595,0.4803,0.4756,0.6266,0.488,0.4842,0.6406,0.4819,0.4741,0.6315,0.4627,0.4684,0.5853
|
31 |
Mistral-7B-Instruct-v0.3,5B~10B,0.7069,0.6749,0.7706,0.7521,0.7161,0.8533,0.5826,0.5868,0.5167,0.7142,0.7222,0.7711,0.7599,0.7205,0.8679,0.7956,0.7205,0.9509,0.6748,0.6547,0.7042,0.6139,0.6127,0.5802,0.7742,0.7074,0.9103,0.6388,0.6387,0.6313
|
32 |
+
Llama3-ChatQA-1.5-8B,5B~10B,0.6114,0.5657,0.8761,0.6276,0.5904,0.885,0.5978,0.5613,0.844,0.6056,0.6016,0.8128,0.6113,0.5825,0.8521,0.6365,0.5805,0.9258,0.6062,0.5625,0.8663,0.6034,0.5629,0.8569,0.6223,0.5694,0.903,0.5658,0.5447,0.7752
|
33 |
+
Ministral-8B-Instruct-2410,5B~10B,0.6447,0.6342,0.6442,0.7197,0.7001,0.7911,0.5176,0.5149,0.3869,0.6868,0.7082,0.7217,0.7326,0.7075,0.8161,0.7362,0.6919,0.8305,0.5742,0.5735,0.5003,0.4649,0.4306,0.2781,0.6894,0.6614,0.7369,0.5258,0.5313,0.4059
|
34 |
+
Phi-3-small-8k-instruct,5B~10B,0.7598,0.7484,0.7666,0.7738,0.7711,0.7936,0.7227,0.7317,0.6914,0.7477,0.7825,0.7432,0.7999,0.7827,0.8445,0.8204,0.7762,0.8907,0.7106,0.7202,0.6662,0.6027,0.6353,0.4468,0.7871,0.7581,0.8233,0.5930,0.6349,0.4325
|
35 |
+
Phi-3-small-128k-instruct,5B~10B,0.7158,0.7404,0.6454,0.6831,0.7398,0.5872,0.6751,0.7171,0.5633,0.6057,0.7155,0.4565,0.6783,0.7379,0.5785,0.8131,0.7893,0.8433,0.6832,0.7183,0.5779,0.6189,0.6671,0.4487,0.7353,0.7481,0.6836,0.5863,0.6398,0.3873
|
data/subclass_per.csv
CHANGED
@@ -5,14 +5,20 @@ Qwen2.5-72B-Instruct,>65B,0.6292,0.6414,0.5480,0.8411,0.7760,0.9689,0.3631,0.028
|
|
5 |
Qwen2-72B-Instruct,>65B,0.6587,0.5982,0.9159,0.7064,0.6373,0.9870,0.4112,0.4039,0.4090,0.6611,0.6383,0.8691,0.6920,0.6315,0.9577,0.6948,0.6175,0.9884,0.6106,0.5703,0.8181,0.4184,0.4103,0.4236,0.6658,0.5992,0.9347,0.4887,0.4879,0.5650
|
6 |
Opt-66B,>65B,0.645,0.5831,0.9572,0.3981,0.417,0.4471,0.6667,0.5971,0.9953,0.6232,0.6095,0.8551,0.4854,0.4984,0.6176,0.652,0.5874,0.9698,0.6511,0.5859,0.9706,0.6604,0.5926,0.9853,0.6556,0.586,0.9846,0.655,0.5943,0.9665
|
7 |
Llama3-ChatQA-1.5-70B,>65B,0.3666,0.2082,0.1069,0.339,0.169,0.0752,0.3147,0.0148,0.0059,0.2947,0.075,0.0261,0.7758,0.7167,0.9293,0.5528,0.5482,0.4877,0.3396,0.111,0.0507,0.3207,0.0374,0.0156,0.4392,0.3806,0.2524,0.3214,0.0614,0.0253
|
|
|
|
|
8 |
Yi-1.5-34B-Chat,~30B,0.7139,0.8341,0.5176,0.7722,0.8735,0.6482,0.475,0.2581,0.0357,0.7162,0.8717,0.5603,0.6206,0.7912,0.353,0.8816,0.8938,0.8601,0.6412,0.7813,0.3672,0.497,0.4306,0.0769,0.8472,0.8832,0.7889,0.4818,0.3646,0.0576
|
9 |
Qwen2.5-32B-Instruct,~30B,0.6749,0.6366,0.7789,0.7893,0.7099,0.9938,0.4372,0.4025,0.2943,0.7921,0.7323,0.9739,0.7723,0.7036,0.9599,0.7702,0.6873,0.9727,0.5920,0.5774,0.6092,0.4358,0.3969,0.2906,0.7404,0.6695,0.9160,0.4640,0.4506,0.3514
|
10 |
Opt-30B,~30B,0.5831,0.5754,0.5565,0.3952,0.338,0.1915,0.6784,0.6507,0.7506,0.5798,0.6281,0.5559,0.357,0.2405,0.1185,0.406,0.3224,0.1945,0.6203,0.6061,0.633,0.6188,0.6076,0.6293,0.6031,0.5886,0.5976,0.6244,0.6184,0.6415
|
|
|
11 |
Baichuan2-13B-Chat,10B~20B,0.7346,0.6715,0.8932,0.7703,0.7043,0.9491,0.6303,0.6129,0.6785,0.7435,0.7152,0.8777,0.779,0.7088,0.9649,0.7677,0.6883,0.9601,0.6763,0.6388,0.7738,0.6359,0.6149,0.6904,0.7096,0.6554,0.8436,0.7306,0.6762,0.8788
|
12 |
Qwen1.5-14B-Chat,10B~20B,0.625,0.5683,0.964,0.6549,0.5977,0.9932,0.5983,0.5571,0.9038,0.6561,0.6193,0.9535,0.6592,0.6005,0.9994,0.6382,0.5759,0.9897,0.5579,0.53,0.8275,0.5009,0.4938,0.7077,0.6256,0.566,0.9705,0.6063,0.5643,0.914
|
13 |
Ziya2-13B-Chat,10B~20B,0.6322,0.6632,0.502,0.381,0.0822,0.0212,0.4263,0.2557,0.086,0.4352,0.4474,0.1651,0.612,0.6721,0.4744,0.812,0.7741,0.8691,0.4904,0.4516,0.2102,0.5309,0.5403,0.2964,0.7186,0.7235,0.6777,0.4811,0.4512,0.2021
|
14 |
InternLM2-Chat-20B,10B~20B,0.5184,0.5912,0.0441,0.4754,0.0222,0.0006,0.4929,0.0222,0.0006,0.4744,0.7043,0.0573,0.605,0.904,0.256,0.5265,0.6774,0.0625,0.5689,0.8292,0.146,0.5046,0.4073,0.0202,0.7142,0.9352,0.44,0.498,0.4041,0.0196
|
15 |
Opt-13B,10B~20B,0.5011,0.0392,0.0015,0.4792,0.0695,0.0018,0.4958,0,0,0.4492,0.237,0.0055,0.4897,0.5438,0.0249,0.4996,0.0333,0.0006,0.5037,0.1931,0.0055,0.5454,0.8065,0.0965,0.5155,0.499,0.0228,0.5016,0.4815,0.0203
|
|
|
|
|
|
|
16 |
Gemma-1.1-7B-it,5B~10B,0.6885,0.6193,0.9389,0.7201,0.6502,0.9795,0.6709,0.6133,0.8985,0.7171,0.6709,0.9421,0.5993,0.5861,0.7426,0.7164,0.634,0.9953,0.6316,0.5872,0.8235,0.5207,0.5098,0.595,0.6874,0.616,0.9415,0.6164,0.5853,0.7856
|
17 |
Qwen1.5-7B-Chat,5B~10B,0.6415,0.5933,0.8439,0.7295,0.6542,0.9987,0.5495,0.5352,0.6535,0.7415,0.6808,0.9875,0.7286,0.6545,0.9955,0.7167,0.6339,0.9966,0.6122,0.5749,0.784,0.4866,0.4788,0.5265,0.6887,0.6165,0.9449,0.4276,0.4219,0.4072
|
18 |
Yi-1.5-9B-Chat,5B~10B,0.7089,0.8612,0.4825,0.5418,0.7129,0.1741,0.4846,0.2932,0.0308,0.5376,0.7743,0.2115,0.6185,0.8236,0.3254,0.818,0.9011,0.7057,0.5819,0.7416,0.2207,0.4893,0.3279,0.0365,0.7959,0.8937,0.6572,0.477,0.2414,0.0233
|
@@ -23,4 +29,7 @@ GLM-4-9B-Chat,5B~10B,0.4974,0.4928,0.9986,0.5202,0.5158,0.9994,0.4984,0.4957,0.9
|
|
23 |
InternLM2-Chat-7B,5B~10B,0.4988,0,0,0.4767,0,0,0.4943,0,0,0.4453,0.0513,0.0011,0.5829,0.8965,0.21,0.4977,0,0,0.4997,0.0278,0.0007,0.4964,0,0,0.5026,0,0,0.4901,0.0278,0.0006
|
24 |
Opt-6.7B,5B~10B,0.5189,0.5038,0.9645,0.3756,0.4266,0.6456,0.5227,0.5083,0.9638,0.549,0.5504,0.9314,0.2606,0.3276,0.4205,0.4833,0.4847,0.8892,0.5274,0.508,0.9831,0.5244,0.508,0.971,0.5105,0.4973,0.9551,0.5322,0.5159,0.9757
|
25 |
Mistral-7B-Instruct-v0.3,5B~10B,0.4091,0.3399,0.2241,0.3013,0.0672,0.0286,0.3093,0.0548,0.0246,0.3554,0.3176,0.1618,0.4671,0.473,0.3538,0.62,0.6022,0.655,0.432,0.3832,0.2701,0.3362,0.1517,0.0771,0.6338,0.6081,0.6844,0.3814,0.2943,0.1744
|
26 |
-
Llama3-ChatQA-1.5-8B,5B~10B,0.387,0.2816,0.1665,0.3232,0.1355,0.0603,0.3054,0.011,0.0045,0.292,0.0948,0.0354,0.7946,0.7193,0.9821,0.5375,0.5306,0.4746,0.3702,0.2367,0.1312,0.318,0.0621,0.0276,0.4823,0.4562,0.3594,0.3398,0.1632,0.0793
|
|
|
|
|
|
|
|
5 |
Qwen2-72B-Instruct,>65B,0.6587,0.5982,0.9159,0.7064,0.6373,0.9870,0.4112,0.4039,0.4090,0.6611,0.6383,0.8691,0.6920,0.6315,0.9577,0.6948,0.6175,0.9884,0.6106,0.5703,0.8181,0.4184,0.4103,0.4236,0.6658,0.5992,0.9347,0.4887,0.4879,0.5650
|
6 |
Opt-66B,>65B,0.645,0.5831,0.9572,0.3981,0.417,0.4471,0.6667,0.5971,0.9953,0.6232,0.6095,0.8551,0.4854,0.4984,0.6176,0.652,0.5874,0.9698,0.6511,0.5859,0.9706,0.6604,0.5926,0.9853,0.6556,0.586,0.9846,0.655,0.5943,0.9665
|
7 |
Llama3-ChatQA-1.5-70B,>65B,0.3666,0.2082,0.1069,0.339,0.169,0.0752,0.3147,0.0148,0.0059,0.2947,0.075,0.0261,0.7758,0.7167,0.9293,0.5528,0.5482,0.4877,0.3396,0.111,0.0507,0.3207,0.0374,0.0156,0.4392,0.3806,0.2524,0.3214,0.0614,0.0253
|
8 |
+
Llama-3.1-70B-Instruct,>65B,0.4670,0.4105,0.2107,0.3766,0.1681,0.0560,0.3856,0.1439,0.0505,0.3460,0.1387,0.0392,0.4036,0.2873,0.1107,0.3872,0.1394,0.0487,0.4967,0.4715,0.2711,0.4070,0.2331,0.0910,0.4985,0.4691,0.2716,0.6337,0.6553,0.5548
|
9 |
+
Llama-3.3-70B-Instruct,>65B,0.3996,0.3526,0.2759,0.2923,0.1430,0.0771,0.3029,0.1420,0.0825,0.2624,0.1066,0.0486,0.3657,0.3253,0.2213,0.3305,0.2121,0.1358,0.4583,0.4388,0.3966,0.3156,0.1750,0.1062,0.4510,0.4249,0.3802,0.5813,0.5696,0.6459
|
10 |
Yi-1.5-34B-Chat,~30B,0.7139,0.8341,0.5176,0.7722,0.8735,0.6482,0.475,0.2581,0.0357,0.7162,0.8717,0.5603,0.6206,0.7912,0.353,0.8816,0.8938,0.8601,0.6412,0.7813,0.3672,0.497,0.4306,0.0769,0.8472,0.8832,0.7889,0.4818,0.3646,0.0576
|
11 |
Qwen2.5-32B-Instruct,~30B,0.6749,0.6366,0.7789,0.7893,0.7099,0.9938,0.4372,0.4025,0.2943,0.7921,0.7323,0.9739,0.7723,0.7036,0.9599,0.7702,0.6873,0.9727,0.5920,0.5774,0.6092,0.4358,0.3969,0.2906,0.7404,0.6695,0.9160,0.4640,0.4506,0.3514
|
12 |
Opt-30B,~30B,0.5831,0.5754,0.5565,0.3952,0.338,0.1915,0.6784,0.6507,0.7506,0.5798,0.6281,0.5559,0.357,0.2405,0.1185,0.406,0.3224,0.1945,0.6203,0.6061,0.633,0.6188,0.6076,0.6293,0.6031,0.5886,0.5976,0.6244,0.6184,0.6415
|
13 |
+
QwQ-32B-Preview,~30B,0.5231,0.5061,0.9839,0.5519,0.5328,1.0000,0.4141,0.4443,0.7537,0.5814,0.5650,0.9989,0.5529,0.5340,0.9993,0.5318,0.5111,0.9993,0.5083,0.4978,0.9542,0.4392,0.4593,0.8080,0.5238,0.5042,0.9922,0.5269,0.5128,0.9743
|
14 |
Baichuan2-13B-Chat,10B~20B,0.7346,0.6715,0.8932,0.7703,0.7043,0.9491,0.6303,0.6129,0.6785,0.7435,0.7152,0.8777,0.779,0.7088,0.9649,0.7677,0.6883,0.9601,0.6763,0.6388,0.7738,0.6359,0.6149,0.6904,0.7096,0.6554,0.8436,0.7306,0.6762,0.8788
|
15 |
Qwen1.5-14B-Chat,10B~20B,0.625,0.5683,0.964,0.6549,0.5977,0.9932,0.5983,0.5571,0.9038,0.6561,0.6193,0.9535,0.6592,0.6005,0.9994,0.6382,0.5759,0.9897,0.5579,0.53,0.8275,0.5009,0.4938,0.7077,0.6256,0.566,0.9705,0.6063,0.5643,0.914
|
16 |
Ziya2-13B-Chat,10B~20B,0.6322,0.6632,0.502,0.381,0.0822,0.0212,0.4263,0.2557,0.086,0.4352,0.4474,0.1651,0.612,0.6721,0.4744,0.812,0.7741,0.8691,0.4904,0.4516,0.2102,0.5309,0.5403,0.2964,0.7186,0.7235,0.6777,0.4811,0.4512,0.2021
|
17 |
InternLM2-Chat-20B,10B~20B,0.5184,0.5912,0.0441,0.4754,0.0222,0.0006,0.4929,0.0222,0.0006,0.4744,0.7043,0.0573,0.605,0.904,0.256,0.5265,0.6774,0.0625,0.5689,0.8292,0.146,0.5046,0.4073,0.0202,0.7142,0.9352,0.44,0.498,0.4041,0.0196
|
18 |
Opt-13B,10B~20B,0.5011,0.0392,0.0015,0.4792,0.0695,0.0018,0.4958,0,0,0.4492,0.237,0.0055,0.4897,0.5438,0.0249,0.4996,0.0333,0.0006,0.5037,0.1931,0.0055,0.5454,0.8065,0.0965,0.5155,0.499,0.0228,0.5016,0.4815,0.0203
|
19 |
+
Mistral-Nemo-Instruct-2407,10B~20B,0.6992,0.6359,0.8960,0.7518,0.6773,0.9826,0.6421,0.6067,0.7767,0.7290,0.6896,0.9121,0.7377,0.6719,0.9542,0.7482,0.6611,0.9959,0.6396,0.6014,0.7754,0.6045,0.5803,0.7019,0.7246,0.6464,0.9529,0.4910,0.4881,0.4717
|
20 |
+
Phi-3-medium-4k-instruct,10B~20B,0.8162,0.7447,0.9484,0.3950,0.2748,0.1126,0.8368,0.7558,0.9878,0.5763,0.6486,0.4809,0.6431,0.6695,0.5981,0.8403,0.7549,0.9973,0.8092,0.7414,0.9343,0.8263,0.7504,0.9679,0.8352,0.7499,0.9896,0.6361,0.6499,0.5818
|
21 |
+
Phi-3-medium-128k-instruct,10B~20B,0.8024,0.7318,0.9391,0.3592,0.1596,0.0598,0.8232,0.7434,0.9790,0.5228,0.5910,0.3977,0.5699,0.6022,0.4725,0.8293,0.7436,0.9939,0.7813,0.7222,0.8963,0.8009,0.7328,0.9351,0.8260,0.7393,0.9898,0.6525,0.6565,0.6327
|
22 |
Gemma-1.1-7B-it,5B~10B,0.6885,0.6193,0.9389,0.7201,0.6502,0.9795,0.6709,0.6133,0.8985,0.7171,0.6709,0.9421,0.5993,0.5861,0.7426,0.7164,0.634,0.9953,0.6316,0.5872,0.8235,0.5207,0.5098,0.595,0.6874,0.616,0.9415,0.6164,0.5853,0.7856
|
23 |
Qwen1.5-7B-Chat,5B~10B,0.6415,0.5933,0.8439,0.7295,0.6542,0.9987,0.5495,0.5352,0.6535,0.7415,0.6808,0.9875,0.7286,0.6545,0.9955,0.7167,0.6339,0.9966,0.6122,0.5749,0.784,0.4866,0.4788,0.5265,0.6887,0.6165,0.9449,0.4276,0.4219,0.4072
|
24 |
Yi-1.5-9B-Chat,5B~10B,0.7089,0.8612,0.4825,0.5418,0.7129,0.1741,0.4846,0.2932,0.0308,0.5376,0.7743,0.2115,0.6185,0.8236,0.3254,0.818,0.9011,0.7057,0.5819,0.7416,0.2207,0.4893,0.3279,0.0365,0.7959,0.8937,0.6572,0.477,0.2414,0.0233
|
|
|
29 |
InternLM2-Chat-7B,5B~10B,0.4988,0,0,0.4767,0,0,0.4943,0,0,0.4453,0.0513,0.0011,0.5829,0.8965,0.21,0.4977,0,0,0.4997,0.0278,0.0007,0.4964,0,0,0.5026,0,0,0.4901,0.0278,0.0006
|
30 |
Opt-6.7B,5B~10B,0.5189,0.5038,0.9645,0.3756,0.4266,0.6456,0.5227,0.5083,0.9638,0.549,0.5504,0.9314,0.2606,0.3276,0.4205,0.4833,0.4847,0.8892,0.5274,0.508,0.9831,0.5244,0.508,0.971,0.5105,0.4973,0.9551,0.5322,0.5159,0.9757
|
31 |
Mistral-7B-Instruct-v0.3,5B~10B,0.4091,0.3399,0.2241,0.3013,0.0672,0.0286,0.3093,0.0548,0.0246,0.3554,0.3176,0.1618,0.4671,0.473,0.3538,0.62,0.6022,0.655,0.432,0.3832,0.2701,0.3362,0.1517,0.0771,0.6338,0.6081,0.6844,0.3814,0.2943,0.1744
|
32 |
+
Llama3-ChatQA-1.5-8B,5B~10B,0.387,0.2816,0.1665,0.3232,0.1355,0.0603,0.3054,0.011,0.0045,0.292,0.0948,0.0354,0.7946,0.7193,0.9821,0.5375,0.5306,0.4746,0.3702,0.2367,0.1312,0.318,0.0621,0.0276,0.4823,0.4562,0.3594,0.3398,0.1632,0.0793
|
33 |
+
Ministral-8B-Instruct-2410,5B~10B,0.6080,0.5720,0.7836,0.7143,0.6458,0.9763,0.3260,0.2659,0.2081,0.6623,0.6438,0.8483,0.7052,0.6425,0.9570,0.7069,0.6278,0.9849,0.5197,0.5064,0.6030,0.3152,0.2420,0.1855,0.6558,0.5975,0.8847,0.4132,0.4054,0.3860
|
34 |
+
Phi-3-small-8k-instruct,5B~10B,0.6365,0.5771,0.9543,0.4834,0.4955,0.6276,0.6479,0.5862,0.9722,0.6323,0.6122,0.8839,0.6153,0.5820,0.8846,0.6563,0.5881,0.9939,0.5791,0.5445,0.8380,0.6012,0.5593,0.8793,0.6322,0.5723,0.9513,0.4856,0.4872,0.6404
|
35 |
+
Phi-3-small-128k-instruct,5B~10B,0.6085,0.5851,0.6810,0.3324,0.2336,0.1343,0.7347,0.6638,0.9355,0.6062,0.6315,0.6625,0.6078,0.6056,0.6736,0.7148,0.6513,0.8975,0.6468,0.6108,0.7597,0.7331,0.6615,0.9338,0.7076,0.6437,0.8871,0.4432,0.4270,0.3467
|