Junetheriver commited on
Commit
fe35dbb
1 Parent(s): 00eb906

update leaderboard 2024-09-06

Browse files
app.py CHANGED
@@ -40,7 +40,11 @@ def process_mc_df(df, shot=None):
40
  # 将zero_naive, zero_self_con, zero_cot, zero_cot_self_con, few_naive, few_self_con, few_cot, few_cot_self_con列重新组织成MultiIndex,一层为Zeroshot, Fewshot,一层为Naive, Self-Consistency, CoT, CoT+Self-Consistency
41
  df = df.set_index("Model")
42
  # df = df.stack().unstack()
43
- df.columns = pd.MultiIndex.from_tuples([("Zeroshot", "Naive"), ("Zeroshot", "SC"), ("Zeroshot", "CoT"), ("Zeroshot", "CoT+SC"), ("Fewshot", "Naive"), ("Fewshot", "SC"), ("Fewshot", "CoT"), ("Fewshot", "CoT+SC")])
 
 
 
 
44
  # 保留shot的列,比如如果shot=Zeroshot那么只有Zeroshot的列会被保留
45
  if shot:
46
  df = df[shot]
 
40
  # 将zero_naive, zero_self_con, zero_cot, zero_cot_self_con, few_naive, few_self_con, few_cot, few_cot_self_con列重新组织成MultiIndex,一层为Zeroshot, Fewshot,一层为Naive, Self-Consistency, CoT, CoT+Self-Consistency
41
  df = df.set_index("Model")
42
  # df = df.stack().unstack()
43
+ try:
44
+ df.columns = pd.MultiIndex.from_tuples([("Zeroshot", "Naive"), ("Zeroshot", "SC"), ("Zeroshot", "CoT"), ("Zeroshot", "CoT+SC"), ("Fewshot", "Naive"), ("Fewshot", "SC"), ("Fewshot", "CoT"), ("Fewshot", "CoT+SC")])
45
+ except:
46
+ print(df)
47
+ raise
48
  # 保留shot的列,比如如果shot=Zeroshot那么只有Zeroshot的列会被保留
49
  if shot:
50
  df = df[shot]
data_v2/bosc_zh_mc_gen.csv CHANGED
@@ -1,22 +1,22 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Baichuan2-13B-Chat,37.5,40.0,47.5,52.5,37.5,37.5,42.5,45.0
3
- Chatglm3-6B,35.0,35.0,50.0,50.0,47.5,47.5,45.0,45.0
4
- Devops-Model-14B-Chat,35.0,27.5,37.5,52.5,50.0,50.0,55.0,62.5
5
- Ernie-Bot-4.0,57.5,57.5,60.0,60.0,52.5,52.5,57.5,57.5
6
- Gpt-3.5-Turbo,50.0,47.5,55.0,55.0,40.0,40.0,50.0,55.0
7
- GPT-4,57.5,57.5,57.5,57.5,52.5,52.5,62.5,62.5
8
- Internlm2-Chat-20B,47.5,47.5,,,47.5,47.5,,
9
- Internlm2-Chat-7B,60.0,60.0,57.5,57.5,55.0,55.0,62.5,62.5
10
- Llama-2-13B,42.5,42.5,50.0,50.0,50.0,50.0,42.5,42.5
11
- Llama-2-70B-Chat,0.0,0.0,57.5,57.5,25.0,25.0,45.0,45.0
12
- Llama-2-7B,32.5,32.5,45.0,45.0,45.0,45.0,45.0,45.0
13
- Mistral-7B,0.0,0.0,37.5,37.5,20.0,20.0,50.0,50.0
14
- Qwen-14B-Chat,47.5,45.0,50.0,47.5,50.0,47.5,55.0,57.5
15
- Qwen-72B-Chat,50.0,50.0,47.5,47.5,45.0,45.0,60.0,60.0
16
- Yi-34B-Chat,55.0,55.0,60.0,67.5,50.0,50.0,52.5,55.0
17
- Claude-3-Opus,,72.85714285714286,,,,,,
18
- gemma_2b,37.5,37.5,40.0,40.0,32.5,32.5,40.0,40.0
19
- gemma_7b,32.5,32.5,62.5,62.5,40.0,40.0,50.0,50.0
20
- Meta-Llama-3-8B-Instruct,,52.85714285714286,,47.14285714285714,,52.85714285714286,,30.0
21
- Qwen1.5-14B-Base,47.5,47.5,50.0,50.0,47.5,47.5,45.0,45.0
22
- Qwen1.5-14B-Chat,45.0,47.5,60.0,72.5,52.5,55.0,60.0,60.0
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Baichuan2-13B-Chat,40.0,52.5,37.5,45.0
3
+ Chatglm3-6B,35.0,50.0,47.5,45.0
4
+ Devops-Model-14B-Chat,27.5,52.5,50.0,62.5
5
+ Ernie-Bot-4.0,57.5,60.0,52.5,57.5
6
+ Gpt-3.5-Turbo,47.5,55.0,40.0,55.0
7
+ GPT-4,57.5,57.5,52.5,62.5
8
+ Internlm2-Chat-20B,47.5,,47.5,
9
+ Internlm2-Chat-7B,60.0,57.5,55.0,62.5
10
+ Llama-2-13B,42.5,50.0,50.0,42.5
11
+ Llama-2-70B-Chat,0.0,57.5,25.0,45.0
12
+ Llama-2-7B,32.5,45.0,45.0,45.0
13
+ Mistral-7B,0.0,37.5,20.0,50.0
14
+ Qwen-14B-Chat,45.0,47.5,47.5,57.5
15
+ Qwen-72B-Chat,50.0,47.5,45.0,60.0
16
+ Yi-34B-Chat,55.0,67.5,50.0,55.0
17
+ Claude-3-Opus,72.85714285714286,,,
18
+ gemma_2b,37.5,40.0,32.5,40.0
19
+ gemma_7b,32.5,62.5,40.0,50.0
20
+ Meta-Llama-3-8B-Instruct,52.85714285714286,47.14285714285714,52.85714285714286,30.0
21
+ Qwen1.5-14B-Base,47.5,50.0,47.5,45.0
22
+ Qwen1.5-14B-Chat,47.5,72.5,55.0,60.0
data_v2/gtja_zh_mc_gen.csv CHANGED
@@ -1,22 +1,22 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Baichuan2-13B-Chat,41.76,41.76,53.85,60.44,38.46,38.46,49.45,51.65
3
- Chatglm3-6B,43.95604396,43.95604396,53.84615385,53.84615385,43.95604396,43.95604396,47.25274725,47.25274725
4
- Devops-Model-14B-Chat,41.76,38.46,45.05,49.45,61.54,59.34,52.75,63.74
5
- Ernie-Bot-4.0,68.13,68.13,64.84,64.84,65.93,65.93,68.13,68.13
6
- Gpt-3.5-Turbo,49.45,52.75,59.34,62.64,47.25,52.75,57.14,58.24
7
- GPT-4,68.13,68.13,67.03,67.03,70.33,70.33,71.43,71.43
8
- Internlm2-Chat-20B,56.04395604,56.04395604,,,65.93406593,65.93406593,,
9
- Internlm2-Chat-7B,56.04395604,56.04395604,59.34065934,59.34065934,54.94505495,54.94505495,51.64835165,51.64835165
10
- Llama-2-13B,30.77,30.77,47.25,47.25,47.25,47.25,42.86,42.86
11
- Llama-2-70B-Chat,6.59,6.59,48.35,48.35,19.78,19.78,49.45,49.45
12
- Llama-2-7B,28.57,28.57,45.05,45.05,42.86,42.86,45.05,45.05
13
- Mistral-7B,5.49,5.49,47.25,47.25,14.29,14.29,38.46,38.46
14
- Qwen-14B-Chat,47.25,47.25,53.85,54.95,54.95,54.95,59.34,61.54
15
- Qwen-72B-Chat,71.43,71.43,67.03,67.03,70.33,70.33,74.73,74.73
16
- Yi-34B-Chat,71.43,74.73,71.43,73.63,69.23,70.33,49.45,47.25
17
- Claude-3-Opus,,41.508438818565395,,,,,,
18
- gemma_2b,30.76923,30.76923,43.95604,43.95604,32.96703,32.96703,29.67033,29.67033
19
- gemma_7b,29.67033,29.67033,56.04396,56.04396,34.06593,34.06593,50.54945,50.54945
20
- Meta-Llama-3-8B-Instruct,,36.550632911392405,,38.08016877637131,,43.24894514767932,,34.28270042194093
21
- Qwen1.5-14B-Base,53.84615,53.84615,63.73626,63.73626,68.13187,68.13187,42.85714,42.85714
22
- Qwen1.5-14B-Chat,56.04396,54.94505,67.03297,68.13187,59.34066,57.14286,60.43956,62.63736
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Baichuan2-13B-Chat,41.76,60.44,38.46,51.65
3
+ Chatglm3-6B,43.95604396,53.84615385,43.95604396,47.25274725
4
+ Devops-Model-14B-Chat,38.46,49.45,59.34,63.74
5
+ Ernie-Bot-4.0,68.13,64.84,65.93,68.13
6
+ Gpt-3.5-Turbo,52.75,62.64,52.75,58.24
7
+ GPT-4,68.13,67.03,70.33,71.43
8
+ Internlm2-Chat-20B,56.04395604,,65.93406593,
9
+ Internlm2-Chat-7B,56.04395604,59.34065934,54.94505495,51.64835165
10
+ Llama-2-13B,30.77,47.25,47.25,42.86
11
+ Llama-2-70B-Chat,6.59,48.35,19.78,49.45
12
+ Llama-2-7B,28.57,45.05,42.86,45.05
13
+ Mistral-7B,5.49,47.25,14.29,38.46
14
+ Qwen-14B-Chat,47.25,54.95,54.95,61.54
15
+ Qwen-72B-Chat,71.43,67.03,70.33,74.73
16
+ Yi-34B-Chat,74.73,73.63,70.33,47.25
17
+ Claude-3-Opus,41.508438818565395,,,
18
+ gemma_2b,30.76923,43.95604,32.96703,29.67033
19
+ gemma_7b,29.67033,56.04396,34.06593,50.54945
20
+ Meta-Llama-3-8B-Instruct,36.550632911392405,38.08016877637131,43.24894514767932,34.28270042194093
21
+ Qwen1.5-14B-Base,53.84615,63.73626,68.13187,42.85714
22
+ Qwen1.5-14B-Chat,54.94505,68.13187,57.14286,62.63736
data_v2/huaweicloud_zh_mc_gen.csv CHANGED
@@ -1,22 +1,22 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Baichuan2-13B-Chat,6.67,10.0,23.33,23.33,16.67,20.0,26.67,30.0
3
- Chatglm3-6B,13.33333333,13.33333333,16.66666667,16.66666667,6.666666667,6.666666667,13.33333333,13.33333333
4
- Devops-Model-14B-Chat,16.67,16.67,33.33,13.33,40.0,40.0,20.0,23.33
5
- Ernie-Bot-4.0,16.67,16.67,20.0,20.0,36.67,36.67,23.33,23.33
6
- Gpt-3.5-Turbo,13.33,13.33,20.0,26.67,20.0,20.0,16.67,23.33
7
- GPT-4,20.0,20.0,20.0,20.0,43.33,43.33,46.67,46.67
8
- Internlm2-Chat-20B,13.33333333,13.33333333,20.0,20.0,16.66666667,16.66666667,,
9
- Internlm2-Chat-7B,43.33333333,43.33333333,23.33333333,23.33333333,30.0,30.0,40.0,40.0
10
- Llama-2-13B,10.0,10.0,20.0,20.0,26.67,26.67,13.33,13.33
11
- Llama-2-70B-Chat,3.33,3.33,20.0,20.0,23.33,23.33,16.67,16.67
12
- Llama-2-7B,10.0,10.0,26.67,26.67,16.67,16.67,33.33,33.33
13
- Mistral-7B,0.0,0.0,23.33,23.33,0.0,0.0,16.67,16.67
14
- Qwen-14B-Chat,13.33,13.33,20.0,26.67,40.0,30.0,26.67,33.33
15
- Qwen-72B-Chat,36.67,36.67,33.33,33.33,43.33,43.33,33.33,36.67
16
- Yi-34B-Chat,36.67,40.0,36.67,30.0,50.0,46.67,30.0,43.33
17
- Claude-3-Opus,,55.0,,,,,,
18
- gemma_2b,26.66667,26.66667,10.0,10.0,26.66667,26.66667,20.0,20.0
19
- gemma_7b,3.333333,3.333333,23.33333,23.33333,13.33333,13.33333,30.0,30.0
20
- Meta-Llama-3-8B-Instruct,,27.5,,22.5,,30.0,,30.0
21
- Qwen1.5-14B-Base,20.0,20.0,33.33333,33.33333,20.0,20.0,30.0,30.0
22
- Qwen1.5-14B-Chat,23.33333,26.66667,13.33333,13.33333,26.66667,26.66667,20.0,30.0
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Baichuan2-13B-Chat,10.0,23.33,20.0,30.0
3
+ Chatglm3-6B,13.33333333,16.66666667,6.666666667,13.33333333
4
+ Devops-Model-14B-Chat,16.67,13.33,40.0,23.33
5
+ Ernie-Bot-4.0,16.67,20.0,36.67,23.33
6
+ Gpt-3.5-Turbo,13.33,26.67,20.0,23.33
7
+ GPT-4,20.0,20.0,43.33,46.67
8
+ Internlm2-Chat-20B,13.33333333,20.0,16.66666667,
9
+ Internlm2-Chat-7B,43.33333333,23.33333333,30.0,40.0
10
+ Llama-2-13B,10.0,20.0,26.67,13.33
11
+ Llama-2-70B-Chat,3.33,20.0,23.33,16.67
12
+ Llama-2-7B,10.0,26.67,16.67,33.33
13
+ Mistral-7B,0.0,23.33,0.0,16.67
14
+ Qwen-14B-Chat,13.33,26.67,30.0,33.33
15
+ Qwen-72B-Chat,36.67,33.33,43.33,36.67
16
+ Yi-34B-Chat,40.0,30.0,46.67,43.33
17
+ Claude-3-Opus,55.0,,,
18
+ gemma_2b,26.66667,10.0,26.66667,20.0
19
+ gemma_7b,3.333333,23.33333,13.33333,30.0
20
+ Meta-Llama-3-8B-Instruct,27.5,22.5,30.0,30.0
21
+ Qwen1.5-14B-Base,20.0,33.33333,20.0,30.0
22
+ Qwen1.5-14B-Chat,26.66667,13.33333,26.66667,30.0
data_v2/inspur_en_mc_gen.csv CHANGED
@@ -1,8 +1,8 @@
1
- name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
- Gpt-4,85.71428571428571,87.75510204081633,90.47619047619048,91.15646258503402
3
- GPT-4o,89.79591836734694,90.47619047619048,91.15646258503402,92.51700680272108
4
- Baichuan2-7B-Chat,44.89795918367347,66.66666666666666,28.57142857142857,50.34013605442177
5
- Claude-3-Opus,87.75510204081633,89.1156462585034,91.15646258503402,88.43537414965986
6
- Qwen2-0.5B-Instruct,,53.06122448979592,,52.38095238095239
7
- Qwen2-1.5B-Instruct,,67.3469387755102,65.98639455782312,
8
- Qwen2-7B-Instruct,80.95238095238095,,80.27210884353741,82.31292517006803
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Gpt-4,85.71428571428571,85.71428571428571,87.75510204081633,87.75510204081633,90.47619047619048,90.47619047619048,91.15646258503402,91.15646258503402
3
+ GPT-4o,89.79591836734694,89.79591836734694,90.47619047619048,90.47619047619048,91.15646258503402,91.15646258503402,92.51700680272108,92.51700680272108
4
+ Baichuan2-7B-Chat,44.89795918367347,44.89795918367347,66.66666666666666,66.66666666666666,28.57142857142857,28.57142857142857,50.34013605442177,50.34013605442177
5
+ Claude-3-Opus,87.75510204081633,87.75510204081633,89.1156462585034,89.1156462585034,91.15646258503402,91.15646258503402,88.43537414965986,88.43537414965986
6
+ Qwen2-0.5B-Instruct,,,53.06122448979592,53.06122448979592,,,52.38095238095239,52.38095238095239
7
+ Qwen2-1.5B-Instruct,,,67.3469387755102,67.3469387755102,65.98639455782312,65.98639455782312,,
8
+ Qwen2-7B-Instruct,80.95238095238095,80.95238095238095,,,80.27210884353741,80.27210884353741,82.31292517006803,82.31292517006803
data_v2/inspur_zh_mc_gen.csv CHANGED
@@ -1,8 +1,8 @@
1
- name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
- Gpt-4,,87.07482993197279,87.07482993197279,91.15646258503402
3
- GPT-4o,87.07482993197279,89.1156462585034,89.1156462585034,91.15646258503402
4
- Baichuan2-7B-Chat,62.585034013605444,,42.857142857142854,
5
- Claude-3-Opus,83.6734693877551,85.03401360544217,87.75510204081633,91.83673469387756
6
- Qwen2-0.5B-Instruct,56.4625850340136,,,57.14285714285714
7
- Qwen2-1.5B-Instruct,,68.02721088435374,,
8
- Qwen2-7B-Instruct,76.19047619047619,80.95238095238095,76.87074829931973,
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Gpt-4,,,87.07482993197279,87.07482993197279,87.07482993197279,87.07482993197279,91.15646258503402,91.15646258503402
3
+ GPT-4o,87.07482993197279,87.07482993197279,89.1156462585034,89.1156462585034,89.1156462585034,89.1156462585034,91.15646258503402,91.15646258503402
4
+ Baichuan2-7B-Chat,62.585034013605444,62.585034013605444,,,42.857142857142854,42.857142857142854,,
5
+ Claude-3-Opus,83.6734693877551,83.6734693877551,85.03401360544217,85.03401360544217,87.75510204081633,87.75510204081633,91.83673469387756,91.83673469387756
6
+ Qwen2-0.5B-Instruct,56.4625850340136,56.4625850340136,,,,,57.14285714285714,57.14285714285714
7
+ Qwen2-1.5B-Instruct,,,68.02721088435374,68.02721088435374,,,,
8
+ Qwen2-7B-Instruct,76.19047619047619,76.19047619047619,80.95238095238095,80.95238095238095,76.87074829931973,76.87074829931973,,
data_v2/lenovo_zh_mc_gen.csv CHANGED
@@ -1,16 +1,16 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Baichuan2-13B-Chat,65.0,60.0,72.5,67.5,62.5,60.0,70.0,67.5
3
- Chatglm3-6B,60.0,60.0,60.0,60.0,55.0,55.0,60.0,60.0
4
- Devops-Model-14B-Chat,60.0,67.5,65.0,57.5,67.5,70.0,62.5,70.0
5
- Ernie-Bot-4.0,75.0,75.0,77.5,77.5,75.0,75.0,82.5,82.5
6
- Gpt-3.5-Turbo,60.0,62.5,65.0,70.0,57.5,57.5,62.5,62.5
7
- GPT-4,77.5,77.5,82.5,82.5,77.5,77.5,82.5,82.5
8
- Llama-2-13B,45.0,45.0,62.5,62.5,60.0,60.0,55.0,55.0
9
- Llama-2-70B-Chat,22.5,22.5,75.0,75.0,20.0,20.0,57.5,57.5
10
- Llama-2-7B,32.5,32.5,45.0,45.0,60.0,60.0,55.0,55.0
11
- Mistral-7B,47.5,47.5,62.5,62.5,35.0,35.0,60.0,60.0
12
- Qwen-14B-Chat,70.0,67.5,70.0,67.5,70.0,65.0,65.0,67.5
13
- Qwen-72B-Chat,72.5,72.5,75.0,75.0,75.0,75.0,75.0,75.0
14
- Yi-34B-Chat,75.0,75.0,87.5,82.5,62.5,57.5,52.5,52.5
15
- Claude-3-Opus,,71.42857142857143,,,,,,
16
- Meta-Llama-3-8B-Instruct,,47.14285714285714,,44.285714285714285,,45.714285714285715,,32.857142857142854
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Baichuan2-13B-Chat,60.0,67.5,60.0,67.5
3
+ Chatglm3-6B,60.0,60.0,55.0,60.0
4
+ Devops-Model-14B-Chat,67.5,57.5,70.0,70.0
5
+ Ernie-Bot-4.0,75.0,77.5,75.0,82.5
6
+ Gpt-3.5-Turbo,62.5,70.0,57.5,62.5
7
+ GPT-4,77.5,82.5,77.5,82.5
8
+ Llama-2-13B,45.0,62.5,60.0,55.0
9
+ Llama-2-70B-Chat,22.5,75.0,20.0,57.5
10
+ Llama-2-7B,32.5,45.0,60.0,55.0
11
+ Mistral-7B,47.5,62.5,35.0,60.0
12
+ Qwen-14B-Chat,67.5,67.5,65.0,67.5
13
+ Qwen-72B-Chat,72.5,75.0,75.0,75.0
14
+ Yi-34B-Chat,75.0,82.5,57.5,52.5
15
+ Claude-3-Opus,71.42857142857143,,,
16
+ Meta-Llama-3-8B-Instruct,47.14285714285714,44.285714285714285,45.714285714285715,32.857142857142854
data_v2/network_en_mc_gen.csv CHANGED
@@ -1,29 +1,29 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Aquilachat2-34B,36.63,36.63,44.83,44.83,46.65,46.65,,
3
- Baichuan-13B-Chat,18.3,20.4,28.6,37.0,24.1,26.7,18.2,17.8
4
- Baichuan2-13B-Chat,14.1,15.3,24.1,25.8,32.3,33.1,25.6,27.7
5
- Chatglm2-6B,24.8,24.7,36.6,36.5,37.6,37.6,40.5,40.5
6
- Chatglm3-6B,43.38487973,43.38487973,44.58762887,44.58762887,42.09621993,42.09621993,43.47079038,43.47079038
7
- Chinese-Alpaca-2-13B,37.7,37.7,49.7,49.7,48.6,48.6,50.5,50.5
8
- Chinese-Llama-2-13B,29.4,29.4,37.8,37.8,40.4,40.4,28.8,28.8
9
- Devops-Model-14B-Chat,30.69,30.59,55.77,63.63,63.85,61.96,41.15,44.01
10
- Ernie-Bot-4.0,61.15,61.15,70.0,70.0,60.0,60.0,70.0,70.0
11
- Gpt-3.5-Turbo,66.6,66.8,69.6,72.0,68.3,68.3,70.9,72.5
12
- Gpt-4,,,,,,,88.7,88.7
13
- Internlm-7B,38.7,38.7,43.9,43.9,45.2,45.2,51.4,51.4
14
- Internlm2-Chat-20B,56.35738832,56.35738832,26.18025751,26.18025751,60.48109966,60.48109966,45.10309278,45.10309278
15
- Internlm2-Chat-7B,49.74226804,49.74226804,56.18556701,56.18556701,48.19587629,48.19587629,49.74226804,49.74226804
16
- Llama-2-13B,41.8,46.5,53.1,58.7,53.3,53.0,56.8,61.0
17
- Llama-2-70B-Chat,25.29,25.29,57.97,58.06,52.97,52.97,58.55,58.55
18
- Llama-2-7B,39.5,40.0,45.4,49.5,48.2,46.8,52.0,55.2
19
- Mistral-7B,29.27,29.27,46.3,46.3,47.22,47.22,45.58,45.58
20
- Qwen-14B-Chat,43.78,47.81,56.58,59.4,62.09,59.7,49.06,55.88
21
- Qwen-72B-Chat,70.41,70.5,72.38,72.56,70.32,70.32,70.13,70.22
22
- Qwen-7B-Chat,45.9,46.0,47.3,50.1,52.1,51.0,48.3,49.8
23
- Yi-34B-Chat,57.75,59.14,65.11,68.79,68.16,68.37,78.09,80.06
24
- Claude-3-Opus,,69.03417341637355,,,,,,
25
- gemma_2b,26.46048,26.46048,33.41924,33.41924,26.6323,26.6323,37.54296,37.54296
26
- gemma_7b,25.08591,25.08591,50.85911,50.85911,30.24055,30.24055,51.55747,51.55747
27
- Meta-Llama-3-8B-Instruct,,38.279481659390655,,76.69172932330827,,23.734458771084668,,33.241749376506874
28
- Qwen1.5-14B-Base,34.87973,34.87973,60.82474,60.82474,65.54983,65.54983,47.07904,47.07904
29
- Qwen1.5-14B-Chat,54.89691,56.4433,64.08935,67.09622,52.23368,53.52234,59.53608,64.17526
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Aquilachat2-34B,36.63,44.83,46.65,
3
+ Baichuan-13B-Chat,20.4,37.0,26.7,17.8
4
+ Baichuan2-13B-Chat,15.3,25.8,33.1,27.7
5
+ Chatglm2-6B,24.7,36.5,37.6,40.5
6
+ Chatglm3-6B,43.38487973,44.58762887,42.09621993,43.47079038
7
+ Chinese-Alpaca-2-13B,37.7,49.7,48.6,50.5
8
+ Chinese-Llama-2-13B,29.4,37.8,40.4,28.8
9
+ Devops-Model-14B-Chat,30.59,63.63,61.96,44.01
10
+ Ernie-Bot-4.0,61.15,70.0,60.0,70.0
11
+ Gpt-3.5-Turbo,66.8,72.0,68.3,72.5
12
+ Gpt-4,,,,88.7
13
+ Internlm-7B,38.7,43.9,45.2,51.4
14
+ Internlm2-Chat-20B,56.35738832,26.18025751,60.48109966,45.10309278
15
+ Internlm2-Chat-7B,49.74226804,56.18556701,48.19587629,49.74226804
16
+ Llama-2-13B,46.5,58.7,53.0,61.0
17
+ Llama-2-70B-Chat,25.29,58.06,52.97,58.55
18
+ Llama-2-7B,40.0,49.5,46.8,55.2
19
+ Mistral-7B,29.27,46.3,47.22,45.58
20
+ Qwen-14B-Chat,47.81,59.4,59.7,55.88
21
+ Qwen-72B-Chat,70.5,72.56,70.32,70.22
22
+ Qwen-7B-Chat,46.0,50.1,51.0,49.8
23
+ Yi-34B-Chat,59.14,68.79,68.37,80.06
24
+ Claude-3-Opus,69.03417341637355,,,
25
+ gemma_2b,26.46048,33.41924,26.6323,37.54296
26
+ gemma_7b,25.08591,50.85911,30.24055,51.55747
27
+ Meta-Llama-3-8B-Instruct,38.279481659390655,76.69172932330827,23.734458771084668,33.241749376506874
28
+ Qwen1.5-14B-Base,34.87973,60.82474,65.54983,47.07904
29
+ Qwen1.5-14B-Chat,56.4433,67.09622,53.52234,64.17526
data_v2/network_zh_mc_gen.csv CHANGED
@@ -1,32 +1,32 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Aquilachat2-34B,34.66,34.66,47.74,47.74,44.48,44.48,,
3
- Baichuan-13B-Chat,15.2,16.0,43.9,49.7,34.3,36.1,51.3,55.6
4
- Baichuan2-13B-Chat,35.6,35.9,30.5,30.5,34.6,35.6,30.2,32.0
5
- Chatglm2-6B,33.8,33.7,42.1,42.2,36.0,36.0,39.5,39.5
6
- Chatglm3-6B,41.39414802,41.39414802,49.22547332,49.22547332,38.81239243,38.81239243,42.85714286,42.85714286
7
- Chinese-Alpaca-2-13B,33.1,33.1,44.2,44.2,44.0,44.0,42.7,42.7
8
- Chinese-Llama-2-13B,22.5,22.5,38.8,38.8,41.8,41.8,32.2,32.2
9
- Devops-Model-14B-Chat,47.59,46.57,52.52,56.01,62.07,60.08,50.59,55.79
10
- Ernie-Bot-4.0,67.54,67.54,71.96,71.96,72.0,72.0,78.0,78.0
11
- Glm3-Turbo,59.63855422,59.63855422,,,,,,
12
- Glm4,67.383821,67.383821,,,,,,
13
- Gpt-3.5-Turbo,58.4,58.6,64.8,67.6,59.2,59.7,65.2,67.4
14
- Gpt-4,,,,,,,86.0,86.0
15
- Hunyuan-13B,60.0,60.0,70.0,70.0,,,,
16
- Internlm-7B,41.7,41.7,38.4,38.4,42.6,42.6,41.3,41.3
17
- Internlm2-Chat-20B,57.48709122,57.48709122,57.14285714,57.14285714,59.1222031,59.1222031,50.77452668,50.77452668
18
- Internlm2-Chat-7B,54.30292599,54.30292599,59.81067126,59.81067126,58.51979346,58.51979346,51.63511188,51.63511188
19
- Llama-2-13B,29.7,31.6,51.6,57.0,39.6,38.9,48.0,50.6
20
- Llama-2-70B-Chat,38.55,38.55,57.49,57.49,49.09,49.09,48.57,48.57
21
- Llama-2-7B,29.8,30.2,50.1,55.6,38.6,40.8,45.6,50.4
22
- Mistral-7B,1.9,1.9,45.61,45.61,15.0,15.0,35.97,35.97
23
- Qwen-14B-Chat,48.35,48.81,55.35,57.4,58.53,56.12,52.12,54.99
24
- Qwen-72B-Chat,65.77,65.86,68.13,68.3,69.4,69.4,69.99,70.08
25
- Qwen-7B-Chat,29.6,29.9,50.6,53.5,50.4,46.9,46.9,47.7
26
- Yi-34B-Chat,61.61,62.56,68.11,69.75,65.73,65.37,69.88,71.21
27
- Claude-3-Opus,,62.329525111479995,,,,,,
28
- gemma_2b,29.69019,29.69019,39.15663,39.15663,29.77625,29.77625,38.64028,38.64028
29
- gemma_7b,31.58348,31.58348,47.59036,47.59036,34.68158,34.68158,48.88124,48.88124
30
- Meta-Llama-3-8B-Instruct,,35.904696806952444,,38.94801939914722,,41.717931191615406,,31.059792337987826
31
- Qwen1.5-14B-Base,45.18072,45.18072,59.1222,59.1222,61.10155,61.10155,52.4957,52.4957
32
- Qwen1.5-14B-Chat,54.04475,53.87263,62.56454,63.85542,58.77797,58.0895,63.42513,65.57659
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Aquilachat2-34B,34.66,47.74,44.48,
3
+ Baichuan-13B-Chat,16.0,49.7,36.1,55.6
4
+ Baichuan2-13B-Chat,35.9,30.5,35.6,32.0
5
+ Chatglm2-6B,33.7,42.2,36.0,39.5
6
+ Chatglm3-6B,41.39414802,49.22547332,38.81239243,42.85714286
7
+ Chinese-Alpaca-2-13B,33.1,44.2,44.0,42.7
8
+ Chinese-Llama-2-13B,22.5,38.8,41.8,32.2
9
+ Devops-Model-14B-Chat,46.57,56.01,60.08,55.79
10
+ Ernie-Bot-4.0,67.54,71.96,72.0,78.0
11
+ Glm3-Turbo,59.63855422,,,
12
+ Glm4,67.383821,,,
13
+ Gpt-3.5-Turbo,58.6,67.6,59.7,67.4
14
+ Gpt-4,,,,86.0
15
+ Hunyuan-13B,60.0,70.0,,
16
+ Internlm-7B,41.7,38.4,42.6,41.3
17
+ Internlm2-Chat-20B,57.48709122,57.14285714,59.1222031,50.77452668
18
+ Internlm2-Chat-7B,54.30292599,59.81067126,58.51979346,51.63511188
19
+ Llama-2-13B,31.6,57.0,38.9,50.6
20
+ Llama-2-70B-Chat,38.55,57.49,49.09,48.57
21
+ Llama-2-7B,30.2,55.6,40.8,50.4
22
+ Mistral-7B,1.9,45.61,15.0,35.97
23
+ Qwen-14B-Chat,48.81,57.4,56.12,54.99
24
+ Qwen-72B-Chat,65.86,68.3,69.4,70.08
25
+ Qwen-7B-Chat,29.9,53.5,46.9,47.7
26
+ Yi-34B-Chat,62.56,69.75,65.37,71.21
27
+ Claude-3-Opus,62.329525111479995,,,
28
+ gemma_2b,29.69019,39.15663,29.77625,38.64028
29
+ gemma_7b,31.58348,47.59036,34.68158,48.88124
30
+ Meta-Llama-3-8B-Instruct,35.904696806952444,38.94801939914722,41.717931191615406,31.059792337987826
31
+ Qwen1.5-14B-Base,45.18072,59.1222,61.10155,52.4957
32
+ Qwen1.5-14B-Chat,53.87263,63.85542,58.0895,65.57659
data_v2/oracle_en_mc_gen.csv CHANGED
@@ -1,29 +1,29 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Aquilachat2-34B,36.63,36.63,44.83,44.83,46.65,46.65,,
3
- Baichuan-13B-Chat,12.47,11.67,16.5,19.52,24.55,22.54,26.36,28.77
4
- Baichuan2-13B-Chat,17.1,19.1,18.7,22.9,25.9,26.5,20.9,24.5
5
- Chatglm2-6B,20.72,20.52,19.92,19.72,20.12,20.12,22.94,22.74
6
- Chatglm3-6B,20.92555332,20.92555332,25.15090543,25.15090543,24.74849095,24.74849095,29.1750503,29.1750503
7
- Chinese-Alpaca-2-13B,23.14,23.14,28.97,28.97,16.3,16.3,14.29,14.29
8
- Chinese-Llama-2-13B,13.88,13.88,20.52,20.52,16.9,16.9,23.34,23.34
9
- Devops-Model-14B-Chat,25.15,26.96,35.41,38.83,33.2,34.81,27.36,27.36
10
- Ernie-Bot-4.0,43.8,43.8,47.14,47.14,46.0,46.0,54.0,54.0
11
- Gpt-3.5-Turbo,38.63,38.83,40.04,42.05,36.62,37.63,42.66,43.86
12
- Gpt-4,,,59.02,64.56,,,58.35,62.58
13
- Internlm-7B,26.36,26.36,25.55,25.55,25.55,25.55,27.97,27.97
14
- Internlm2-Chat-20B,,,59.21052632,59.21052632,,,,
15
- Internlm2-Chat-7B,27.16297787,27.16297787,28.16901408,28.16901408,29.97987928,29.97987928,30.18108652,30.18108652
16
- Llama-2-13B,16.1,20.32,23.94,29.58,20.12,22.33,24.35,33.8
17
- Llama-2-70B-Chat,19.72,19.72,27.97,27.97,26.56,26.56,32.6,32.6
18
- Llama-2-7B,22.13,23.74,23.74,26.56,19.32,20.52,28.77,33.6
19
- Mistral-7B,17.1,17.1,26.76,26.76,31.19,31.19,27.97,27.97
20
- Qwen-14B-Chat,24.95,28.37,33.0,36.62,27.97,28.37,27.97,24.14
21
- Qwen-72B-Chat,47.28,47.48,48.09,48.09,49.7,49.7,43.46,43.66
22
- Qwen-7B-Chat,18.91,19.11,22.13,23.94,26.76,25.55,34.81,33.4
23
- Yi-34B-Chat,47.08,48.69,47.08,46.28,58.15,58.35,56.94,58.95
24
- Claude-3-Opus,,48.31816996021653,,,,,,
25
- gemma_2b,16.90141,16.90141,19.5171,19.5171,16.09658,16.09658,24.74849,24.74849
26
- gemma_7b,14.28571,14.28571,30.98592,30.98592,2.60223,2.60223,43.85965,43.85965
27
- Meta-Llama-3-8B-Instruct,,28.468825409248026,,40.47805387073632,,23.33528989760647,,34.6197743429205
28
- Qwen1.5-14B-Base,29.17505,29.17505,33.60161,33.60161,36.82093,36.82093,27.7666,27.7666
29
- Qwen1.5-14B-Chat,32.79678,35.41247,39.43662,43.05835,32.39437,33.60161,36.82093,38.833
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Aquilachat2-34B,36.63,44.83,46.65,
3
+ Baichuan-13B-Chat,11.67,19.52,22.54,28.77
4
+ Baichuan2-13B-Chat,19.1,22.9,26.5,24.5
5
+ Chatglm2-6B,20.52,19.72,20.12,22.74
6
+ Chatglm3-6B,20.92555332,25.15090543,24.74849095,29.1750503
7
+ Chinese-Alpaca-2-13B,23.14,28.97,16.3,14.29
8
+ Chinese-Llama-2-13B,13.88,20.52,16.9,23.34
9
+ Devops-Model-14B-Chat,26.96,38.83,34.81,27.36
10
+ Ernie-Bot-4.0,43.8,47.14,46.0,54.0
11
+ Gpt-3.5-Turbo,38.83,42.05,37.63,43.86
12
+ Gpt-4,,64.56,,62.58
13
+ Internlm-7B,26.36,25.55,25.55,27.97
14
+ Internlm2-Chat-20B,,59.21052632,,
15
+ Internlm2-Chat-7B,27.16297787,28.16901408,29.97987928,30.18108652
16
+ Llama-2-13B,20.32,29.58,22.33,33.8
17
+ Llama-2-70B-Chat,19.72,27.97,26.56,32.6
18
+ Llama-2-7B,23.74,26.56,20.52,33.6
19
+ Mistral-7B,17.1,26.76,31.19,27.97
20
+ Qwen-14B-Chat,28.37,36.62,28.37,24.14
21
+ Qwen-72B-Chat,47.48,48.09,49.7,43.66
22
+ Qwen-7B-Chat,19.11,23.94,25.55,33.4
23
+ Yi-34B-Chat,48.69,46.28,58.35,58.95
24
+ Claude-3-Opus,48.31816996021653,,,
25
+ gemma_2b,16.90141,19.5171,16.09658,24.74849
26
+ gemma_7b,14.28571,30.98592,2.60223,43.85965
27
+ Meta-Llama-3-8B-Instruct,28.468825409248026,40.47805387073632,23.33528989760647,34.6197743429205
28
+ Qwen1.5-14B-Base,29.17505,33.60161,36.82093,27.7666
29
+ Qwen1.5-14B-Chat,35.41247,43.05835,33.60161,38.833
data_v2/oracle_zh_mc_gen.csv CHANGED
@@ -1,28 +1,28 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Aquilachat2-34B,34.66,34.66,47.74,47.74,44.48,44.48,,
3
- Baichuan-13B-Chat,12.88,12.07,25.96,27.57,18.91,19.52,27.97,30.58
4
- Baichuan2-13B-Chat,25.7,25.5,20.1,21.3,27.7,26.7,22.7,24.7
5
- Chatglm2-6B,23.34,23.34,24.35,24.14,22.94,22.94,26.16,26.16
6
- Chatglm3-6B,21.32796781,21.32796781,28.97384306,28.97384306,21.73038229,21.73038229,29.57746479,29.57746479
7
- Chinese-Alpaca-2-13B,22.94,22.94,25.75,25.75,25.15,25.15,22.33,22.33
8
- Chinese-Llama-2-13B,14.69,14.69,19.92,19.92,19.72,19.72,20.93,20.93
9
- Devops-Model-14B-Chat,24.75,22.74,28.37,27.77,36.62,37.02,27.57,26.36
10
- Ernie-Bot-4.0,48.56,48.56,50.64,50.64,48.0,48.0,54.0,54.0
11
- Gpt-3.5-Turbo,36.42,35.81,39.24,43.26,39.84,39.44,27.16,27.77
12
- Gpt-4,,,59.38,65.17,,,44.06,48.09
13
- Internlm-7B,25.96,25.96,25.96,25.96,29.18,29.18,28.37,28.37
14
- Internlm2-Chat-7B,28.57142857,28.57142857,31.79074447,31.79074447,30.78470825,30.78470825,31.18712274,31.18712274
15
- Llama-2-13B,23.94,24.35,29.58,31.99,24.55,26.76,21.13,20.72
16
- Llama-2-70B-Chat,15.29,15.29,34.81,34.81,26.76,26.76,33.8,33.8
17
- Llama-2-7B,20.72,20.72,27.16,27.97,21.53,18.51,18.31,17.91
18
- Mistral-7B,1.9,1.9,45.61,45.61,15.0,15.0,35.97,35.97
19
- Qwen-14B-Chat,27.57,27.57,32.39,36.02,40.04,35.41,30.38,33.4
20
- Qwen-72B-Chat,48.29,48.49,49.5,49.7,49.7,49.7,45.27,44.87
21
- Qwen-7B-Chat,18.51,17.71,27.36,28.37,29.78,29.58,33.6,31.79
22
- Yi-34B-Chat,49.9,49.3,52.72,53.72,56.34,56.34,51.31,54.33
23
- Claude-3-Opus,,50.00570664579664,,,,,,
24
- gemma_2b,18.51107,18.51107,24.9497,24.9497,21.52918,21.52918,27.7666,27.7666
25
- gemma_7b,19.3159,19.3159,53.94737,53.94737,18.51107,18.51107,5.204461,5.204461
26
- Meta-Llama-3-8B-Instruct,,33.91785690993282,,27.773429857170807,,41.359323028761494,,32.62733972477663
27
- Qwen1.5-14B-Base,20.92555,20.92555,35.61368,35.61368,41.44869,41.44869,30.78471,30.78471
28
- Qwen1.5-14B-Chat,24.14487,23.34004,40.64386,41.04628,38.22938,38.02817,39.43662,40.04024
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Aquilachat2-34B,34.66,47.74,44.48,
3
+ Baichuan-13B-Chat,12.07,27.57,19.52,30.58
4
+ Baichuan2-13B-Chat,25.5,21.3,26.7,24.7
5
+ Chatglm2-6B,23.34,24.14,22.94,26.16
6
+ Chatglm3-6B,21.32796781,28.97384306,21.73038229,29.57746479
7
+ Chinese-Alpaca-2-13B,22.94,25.75,25.15,22.33
8
+ Chinese-Llama-2-13B,14.69,19.92,19.72,20.93
9
+ Devops-Model-14B-Chat,22.74,27.77,37.02,26.36
10
+ Ernie-Bot-4.0,48.56,50.64,48.0,54.0
11
+ Gpt-3.5-Turbo,35.81,43.26,39.44,27.77
12
+ Gpt-4,,65.17,,48.09
13
+ Internlm-7B,25.96,25.96,29.18,28.37
14
+ Internlm2-Chat-7B,28.57142857,31.79074447,30.78470825,31.18712274
15
+ Llama-2-13B,24.35,31.99,26.76,20.72
16
+ Llama-2-70B-Chat,15.29,34.81,26.76,33.8
17
+ Llama-2-7B,20.72,27.97,18.51,17.91
18
+ Mistral-7B,1.9,45.61,15.0,35.97
19
+ Qwen-14B-Chat,27.57,36.02,35.41,33.4
20
+ Qwen-72B-Chat,48.49,49.7,49.7,44.87
21
+ Qwen-7B-Chat,17.71,28.37,29.58,31.79
22
+ Yi-34B-Chat,49.3,53.72,56.34,54.33
23
+ Claude-3-Opus,50.00570664579664,,,
24
+ gemma_2b,18.51107,24.9497,21.52918,27.7666
25
+ gemma_7b,19.3159,53.94737,18.51107,5.204461
26
+ Meta-Llama-3-8B-Instruct,33.91785690993282,27.773429857170807,41.359323028761494,32.62733972477663
27
+ Qwen1.5-14B-Base,20.92555,35.61368,41.44869,30.78471
28
+ Qwen1.5-14B-Chat,23.34004,41.04628,38.02817,40.04024
data_v2/pufa_zh_mc_gen.csv CHANGED
@@ -1,22 +1,22 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Baichuan2-13B-Chat,65.33,66.67,66.67,66.67,62.67,61.33,62.67,62.67
3
- Chatglm3-6B,60.0,60.0,61.33333333,61.33333333,56.0,56.0,58.66666667,58.66666667
4
- Devops-Model-14B-Chat,29.33,29.33,62.67,61.33,82.67,81.33,53.33,70.67
5
- Ernie-Bot-4.0,86.67,86.67,86.67,86.67,82.67,82.67,86.67,86.67
6
- Gpt-3.5-Turbo,77.33,77.33,84.0,81.33,76.0,78.67,84.0,82.67
7
- GPT-4,88.0,88.0,86.67,86.67,84.0,84.0,90.67,90.67
8
- Internlm2-Chat-20B,76.0,76.0,80.0,80.0,80.0,80.0,,
9
- Internlm2-Chat-7B,78.66666667,78.66666667,72.0,72.0,72.0,72.0,53.33333333,53.33333333
10
- Llama-2-13B,44.0,44.0,68.0,68.0,61.33,61.33,53.33,53.33
11
- Llama-2-70B-Chat,6.67,6.67,65.33,65.33,49.33,49.33,66.67,66.67
12
- Llama-2-7B,25.33,25.33,40.0,40.0,48.0,48.0,52.0,52.0
13
- Mistral-7B,4.0,4.0,58.67,58.67,22.67,22.67,54.67,54.67
14
- Qwen-14B-Chat,73.33,73.33,69.33,72.0,73.33,73.33,72.0,80.0
15
- Qwen-72B-Chat,90.67,90.67,85.33,85.33,88.0,88.0,82.67,82.67
16
- Yi-34B-Chat,84.0,84.0,88.0,88.0,90.67,92.0,78.67,89.33
17
- Claude-3-Opus,,93.24324324324324,,,,,,
18
- gemma_2b,36.0,36.0,41.33333,41.33333,36.0,36.0,30.66667,30.66667
19
- gemma_7b,34.66667,34.66667,56.0,56.0,46.66667,46.66667,56.0,56.0
20
- Meta-Llama-3-8B-Instruct,,85.8108108108108,,31.756756756756754,,83.1081081081081,,27.7027027027027
21
- Qwen1.5-14B-Base,78.66667,78.66667,72.0,72.0,92.0,92.0,42.66667,42.66667
22
- Qwen1.5-14B-Chat,86.66667,89.33333,85.33333,85.33333,78.66667,80.0,86.66667,85.33333
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Baichuan2-13B-Chat,66.67,66.67,61.33,62.67
3
+ Chatglm3-6B,60.0,61.33333333,56.0,58.66666667
4
+ Devops-Model-14B-Chat,29.33,61.33,81.33,70.67
5
+ Ernie-Bot-4.0,86.67,86.67,82.67,86.67
6
+ Gpt-3.5-Turbo,77.33,81.33,78.67,82.67
7
+ GPT-4,88.0,86.67,84.0,90.67
8
+ Internlm2-Chat-20B,76.0,80.0,80.0,
9
+ Internlm2-Chat-7B,78.66666667,72.0,72.0,53.33333333
10
+ Llama-2-13B,44.0,68.0,61.33,53.33
11
+ Llama-2-70B-Chat,6.67,65.33,49.33,66.67
12
+ Llama-2-7B,25.33,40.0,48.0,52.0
13
+ Mistral-7B,4.0,58.67,22.67,54.67
14
+ Qwen-14B-Chat,73.33,72.0,73.33,80.0
15
+ Qwen-72B-Chat,90.67,85.33,88.0,82.67
16
+ Yi-34B-Chat,84.0,88.0,92.0,89.33
17
+ Claude-3-Opus,93.24324324324324,,,
18
+ gemma_2b,36.0,41.33333,36.0,30.66667
19
+ gemma_7b,34.66667,56.0,46.66667,56.0
20
+ Meta-Llama-3-8B-Instruct,85.8108108108108,31.756756756756754,83.1081081081081,27.7027027027027
21
+ Qwen1.5-14B-Base,78.66667,72.0,92.0,42.66667
22
+ Qwen1.5-14B-Chat,89.33333,85.33333,80.0,85.33333
data_v2/rzy_zh_mc_gen.csv CHANGED
@@ -1,22 +1,22 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Baichuan2-13B-Chat,60.17,60.17,62.79,67.5,59.06,59.34,64.45,64.32
3
- Chatglm3-6B,54.21853389,54.21853389,62.10235131,62.10235131,55.32503458,55.32503458,59.33609959,59.33609959
4
- Devops-Model-14B-Chat,53.67,56.85,54.5,59.2,65.28,64.18,55.19,61.83
5
- Ernie-Bot-4.0,76.0,76.0,79.0,79.0,73.0,73.0,77.0,77.0
6
- Gpt-3.5-Turbo,65.28,65.42,66.39,67.5,65.28,66.25,68.05,68.74
7
- GPT-4,65.56,65.56,68.05,68.05,65.28,65.28,68.19,68.19
8
- Internlm2-Chat-20B,63.90041494,63.90041494,64.03872752,64.03872752,,,,
9
- Internlm2-Chat-7B,61.2724758,61.2724758,63.62378976,63.62378976,65.00691563,65.00691563,54.21853389,54.21853389
10
- Llama-2-13B,51.18,51.18,59.06,59.06,57.12,57.12,53.39,53.39
11
- Llama-2-70B-Chat,5.26,5.26,62.52,62.52,48.82,48.82,59.75,59.75
12
- Llama-2-7B,34.85,34.85,44.95,44.95,46.2,46.2,53.39,53.39
13
- Mistral-7B,18.53,18.53,60.3,60.3,29.88,29.88,59.75,59.75
14
- Qwen-14B-Chat,61.96,61.55,61.55,64.45,65.28,63.49,62.93,65.98
15
- Qwen-72B-Chat,66.67,66.67,65.28,65.28,65.98,65.98,70.12,70.12
16
- Yi-34B-Chat,64.45,64.59,67.77,67.36,60.17,60.03,57.68,57.54
17
- Claude-3-Opus,,67.64288271089369,,,,,,
18
- gemma_2b,36.37621,36.37621,45.22822,45.22822,33.60996,33.60996,37.75934,37.75934
19
- gemma_7b,39.41909,39.41909,54.77178,54.77178,42.04703,42.04703,56.70816,56.70816
20
- Meta-Llama-3-8B-Instruct,,36.55172413793103,,28.27586206896552,,38.62068965517241,,34.48275862068966
21
- Qwen1.5-14B-Base,51.17566,51.17566,62.6556,62.6556,65.42185,65.42185,50.89903,50.89903
22
- Qwen1.5-14B-Chat,62.93223,64.03873,64.59198,64.31535,63.34716,63.7621,65.42185,65.9751
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Baichuan2-13B-Chat,60.17,67.5,59.34,64.32
3
+ Chatglm3-6B,54.21853389,62.10235131,55.32503458,59.33609959
4
+ Devops-Model-14B-Chat,56.85,59.2,64.18,61.83
5
+ Ernie-Bot-4.0,76.0,79.0,73.0,77.0
6
+ Gpt-3.5-Turbo,65.42,67.5,66.25,68.74
7
+ GPT-4,65.56,68.05,65.28,68.19
8
+ Internlm2-Chat-20B,63.90041494,64.03872752,,
9
+ Internlm2-Chat-7B,61.2724758,63.62378976,65.00691563,54.21853389
10
+ Llama-2-13B,51.18,59.06,57.12,53.39
11
+ Llama-2-70B-Chat,5.26,62.52,48.82,59.75
12
+ Llama-2-7B,34.85,44.95,46.2,53.39
13
+ Mistral-7B,18.53,60.3,29.88,59.75
14
+ Qwen-14B-Chat,61.55,64.45,63.49,65.98
15
+ Qwen-72B-Chat,66.67,65.28,65.98,70.12
16
+ Yi-34B-Chat,64.59,67.36,60.03,57.54
17
+ Claude-3-Opus,67.64288271089369,,,
18
+ gemma_2b,36.37621,45.22822,33.60996,37.75934
19
+ gemma_7b,39.41909,54.77178,42.04703,56.70816
20
+ Meta-Llama-3-8B-Instruct,36.55172413793103,28.27586206896552,38.62068965517241,34.48275862068966
21
+ Qwen1.5-14B-Base,51.17566,62.6556,65.42185,50.89903
22
+ Qwen1.5-14B-Chat,64.03873,64.31535,63.7621,65.9751
data_v2/zabbix_zh_mc_gen.csv CHANGED
@@ -1,22 +1,22 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Baichuan2-13B-Chat,31.0,29.0,47.0,47.0,29.0,27.0,40.0,43.0
3
- Chatglm3-6B,29.0,29.0,36.0,36.0,29.0,29.0,34.0,34.0
4
- Devops-Model-14B-Chat,27.0,28.0,36.0,33.0,46.0,44.0,44.0,46.0
5
- Ernie-Bot-4.0,44.0,44.0,48.0,48.0,47.0,47.0,51.0,51.0
6
- Gpt-3.5-Turbo,36.0,36.0,42.0,42.0,40.0,40.0,48.0,48.0
7
- GPT-4,51.0,51.0,53.0,53.0,60.0,60.0,59.0,59.0
8
- Internlm2-Chat-20B,41.0,41.0,,,44.0,44.0,,
9
- Internlm2-Chat-7B,43.0,43.0,39.0,39.0,45.0,45.0,35.0,35.0
10
- Llama-2-13B,28.0,28.0,45.0,45.0,40.0,40.0,43.0,43.0
11
- Llama-2-70B-Chat,1.0,1.0,47.0,47.0,29.0,29.0,46.0,46.0
12
- Llama-2-7B,18.0,18.0,35.0,35.0,22.0,22.0,28.0,28.0
13
- Mistral-7B,6.0,6.0,42.0,42.0,11.0,11.0,44.0,44.0
14
- Qwen-14B-Chat,36.0,36.0,39.0,41.0,44.0,40.0,47.0,43.0
15
- Qwen-72B-Chat,46.0,46.0,44.0,44.0,45.0,45.0,61.0,61.0
16
- Yi-34B-Chat,40.0,40.0,40.0,40.0,42.0,42.0,42.0,42.0
17
- Claude-3-Opus,,61.71875,,,,,,
18
- gemma_2b,25.0,25.0,32.0,32.0,24.0,24.0,30.0,30.0
19
- gemma_7b,22.0,22.0,44.0,44.0,28.0,28.0,40.0,40.0
20
- Meta-Llama-3-8B-Instruct,,39.670138888888886,,37.58680555555556,,30.381944444444443,,33.072916666666664
21
- Qwen1.5-14B-Base,38.0,38.0,39.0,39.0,48.0,48.0,36.0,36.0
22
- Qwen1.5-14B-Chat,34.0,34.0,45.0,43.0,42.0,39.0,48.0,49.0
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Baichuan2-13B-Chat,29.0,47.0,27.0,43.0
3
+ Chatglm3-6B,29.0,36.0,29.0,34.0
4
+ Devops-Model-14B-Chat,28.0,33.0,44.0,46.0
5
+ Ernie-Bot-4.0,44.0,48.0,47.0,51.0
6
+ Gpt-3.5-Turbo,36.0,42.0,40.0,48.0
7
+ GPT-4,51.0,53.0,60.0,59.0
8
+ Internlm2-Chat-20B,41.0,,44.0,
9
+ Internlm2-Chat-7B,43.0,39.0,45.0,35.0
10
+ Llama-2-13B,28.0,45.0,40.0,43.0
11
+ Llama-2-70B-Chat,1.0,47.0,29.0,46.0
12
+ Llama-2-7B,18.0,35.0,22.0,28.0
13
+ Mistral-7B,6.0,42.0,11.0,44.0
14
+ Qwen-14B-Chat,36.0,41.0,40.0,43.0
15
+ Qwen-72B-Chat,46.0,44.0,45.0,61.0
16
+ Yi-34B-Chat,40.0,40.0,42.0,42.0
17
+ Claude-3-Opus,61.71875,,,
18
+ gemma_2b,25.0,32.0,24.0,30.0
19
+ gemma_7b,22.0,44.0,28.0,40.0
20
+ Meta-Llama-3-8B-Instruct,39.670138888888886,37.58680555555556,30.381944444444443,33.072916666666664
21
+ Qwen1.5-14B-Base,38.0,39.0,48.0,36.0
22
+ Qwen1.5-14B-Chat,34.0,43.0,39.0,49.0
data_v2/zjyd_zh_mc_gen.csv CHANGED
@@ -1,29 +1,29 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Baichuan-13B-Chat,11.04,11.13,26.92,28.61,14.35,13.22,31.69,33.97
3
- Chatglm2-6B,23.09,23.12,24.22,24.08,30.46,30.46,35.97,35.9
4
- Chatglm3-6B,32.6,32.6,35.4,35.4,28.3,28.3,40.9,40.9
5
- Chinese-Alpaca-2-13B,22.69,22.69,24.59,24.59,40.52,40.52,40.73,40.73
6
- Chinese-Llama-2-13B,17.98,17.98,17.83,17.83,31.66,31.66,36.24,36.24
7
- Devops-Model-14B-Chat,41.04,42.7,48.71,53.57,56.85,57.25,51.3,54.29
8
- Ernie-Bot-4.0,45.99,45.99,48.98,48.98,46.0,46.0,54.0,54.0
9
- Glm3-Turbo,43.0,43.0,,,,,,
10
- Glm4,50.0,50.0,,,,,,
11
- Gpt-3.5-Turbo,37.06,36.83,37.56,39.25,39.42,39.77,41.96,42.15
12
- Gpt-4,,,57.35,62.11,,,61.2,65.68
13
- Internlm-7B,27.81,27.81,19.95,19.95,24.18,24.18,35.35,35.35
14
- Internlm2-Chat-20B,44.6,44.6,47.0,47.0,62.2,62.2,38.3,38.3
15
- Internlm2-Chat-7B,38.8,38.8,44.6,44.6,46.0,46.0,35.8,35.8
16
- Llama-2-13B,25.43,27.16,29.17,29.99,36.56,36.15,37.7,39.02
17
- Llama-2-70B-Chat,24.38,24.38,43.63,43.63,44.65,44.65,48.84,48.84
18
- Llama-2-7B,24.09,23.47,28.69,29.26,29.94,30.03,31.35,31.93
19
- Mistral-7B,1.27,1.27,42.05,42.05,30.72,30.72,46.44,46.44
20
- Qwen-14B-Chat,41.71,41.44,45.58,47.98,53.52,49.92,54.72,58.85
21
- Qwen-72B-Chat,64.79,64.79,65.79,65.72,70.19,70.19,68.31,68.38
22
- Qwen-7B-Chat,36.28,36.5,33.18,33.51,41.58,40.59,31.48,31.46
23
- Yi-34B-Chat,64.91,64.58,62.77,65.51,70.85,70.92,48.77,47.97
24
- Claude-3-Opus,,68.05555555555556,,,,,,
25
- gemma_2b,25.6,25.6,28.3,28.3,19.1,19.1,35.5,35.5
26
- gemma_7b,27.3,27.3,35.4,35.4,17.3,17.3,44.5,44.5
27
- Meta-Llama-3-8B-Instruct,,63.425925925925924,,0.0,,66.2037037037037,,25.0
28
- Qwen1.5-14B-Base,49.1,49.1,49.9,49.9,62.5,62.5,41.3,41.3
29
- Qwen1.5-14B-Chat,38.6,38.9,48.8,50.5,54.6,55.2,52.1,52.7
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Baichuan-13B-Chat,11.13,28.61,13.22,33.97
3
+ Chatglm2-6B,23.12,24.08,30.46,35.9
4
+ Chatglm3-6B,32.6,35.4,28.3,40.9
5
+ Chinese-Alpaca-2-13B,22.69,24.59,40.52,40.73
6
+ Chinese-Llama-2-13B,17.98,17.83,31.66,36.24
7
+ Devops-Model-14B-Chat,42.7,53.57,57.25,54.29
8
+ Ernie-Bot-4.0,45.99,48.98,46.0,54.0
9
+ Glm3-Turbo,43.0,,,
10
+ Glm4,50.0,,,
11
+ Gpt-3.5-Turbo,36.83,39.25,39.77,42.15
12
+ Gpt-4,,62.11,,65.68
13
+ Internlm-7B,27.81,19.95,24.18,35.35
14
+ Internlm2-Chat-20B,44.6,47.0,62.2,38.3
15
+ Internlm2-Chat-7B,38.8,44.6,46.0,35.8
16
+ Llama-2-13B,27.16,29.99,36.15,39.02
17
+ Llama-2-70B-Chat,24.38,43.63,44.65,48.84
18
+ Llama-2-7B,23.47,29.26,30.03,31.93
19
+ Mistral-7B,1.27,42.05,30.72,46.44
20
+ Qwen-14B-Chat,41.44,47.98,49.92,58.85
21
+ Qwen-72B-Chat,64.79,65.72,70.19,68.38
22
+ Qwen-7B-Chat,36.5,33.51,40.59,31.46
23
+ Yi-34B-Chat,64.58,65.51,70.92,47.97
24
+ Claude-3-Opus,68.05555555555556,,,
25
+ gemma_2b,25.6,28.3,19.1,35.5
26
+ gemma_7b,27.3,35.4,17.3,44.5
27
+ Meta-Llama-3-8B-Instruct,63.425925925925924,0.0,66.2037037037037,25.0
28
+ Qwen1.5-14B-Base,49.1,49.9,62.5,41.3
29
+ Qwen1.5-14B-Chat,38.9,50.5,55.2,52.7
data_v2/zte_en_mc_gen.csv CHANGED
@@ -1,28 +1,28 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Baichuan-13B-Chat,11.6,14.31,14.68,18.46,14.56,15.68,16.21,16.82
3
- Chatglm2-6B,15.94,16.06,19.83,19.91,26.27,26.22,28.25,28.37
4
- Chatglm3-6B,30.4,30.4,30.7,30.7,26.9,26.9,37.2,37.2
5
- Chinese-Alpaca-2-13B,20.86,20.86,23.08,23.08,29.75,29.75,32.83,32.83
6
- Chinese-Llama-2-13B,10.02,10.02,19.51,19.51,34.51,34.51,33.34,33.34
7
- Devops-Model-14B-Chat,31.04,30.51,42.84,47.37,52.25,49.38,45.9,47.23
8
- Ernie-Bot-4.0,43.66,43.66,51.99,51.99,44.0,44.0,50.0,50.0
9
- Gpt-3.5-Turbo,35.04,34.82,38.46,43.5,39.29,39.19,41.01,42.58
10
- Gpt-4,,,56.9,65.49,,,59.39,63.54
11
- Internlm-7B,20.48,20.48,23.85,23.85,23.69,23.69,26.06,26.06
12
- Internlm2-Chat-20B,39.1,39.1,37.7,37.7,47.7,47.7,33.5,33.5
13
- Internlm2-Chat-7B,36.8,36.8,31.7,31.7,46.3,46.3,36.9,36.9
14
- Llama-2-13B,15.62,18.32,29.88,34.45,23.16,29.14,37.59,44.3
15
- Llama-2-70B-Chat,23.64,23.64,39.31,39.31,38.98,39.12,47.9,47.9
16
- Llama-2-7B,19.42,21.62,25.46,27.11,21.45,24.85,33.6,34.83
17
- Mistral-7B,26.91,26.91,30.65,30.65,40.52,40.52,46.84,46.84
18
- Qwen-14B-Chat,33.71,36.25,41.24,42.51,51.19,50.39,57.18,59.18
19
- Qwen-72B-Chat,53.19,53.19,55.25,55.52,58.13,58.13,58.72,58.99
20
- Qwen-7B-Chat,33.37,33.74,32.97,34.1,32.98,32.7,36.6,36.65
21
- Yi-34B-Chat,38.24,37.04,48.24,52.1,61.33,61.19,53.53,53.39
22
- Claude-3-Opus,,49.599999999999994,,,,,,
23
- gemma_2b,20.1,20.1,24.2,24.2,31.2,31.2,35.5,35.5
24
- gemma_7b,23.1,23.1,34.4,34.4,21.4,21.4,33.1,33.1
25
- Meta-Llama-3-70B-Instruct,,38.9,,63.4,,37.6,,59.0
26
- Meta-Llama-3-8B-Instruct,,24.7,,35.4,,19.7,,32.9
27
- Qwen1.5-14B-Base,34.0,34.0,42.8,42.8,57.9,57.9,40.2,40.2
28
- Qwen1.5-14B-Chat,34.5,35.6,41.7,41.1,33.2,34.7,46.2,47.4
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Baichuan-13B-Chat,14.31,18.46,15.68,16.82
3
+ Chatglm2-6B,16.06,19.91,26.22,28.37
4
+ Chatglm3-6B,30.4,30.7,26.9,37.2
5
+ Chinese-Alpaca-2-13B,20.86,23.08,29.75,32.83
6
+ Chinese-Llama-2-13B,10.02,19.51,34.51,33.34
7
+ Devops-Model-14B-Chat,30.51,47.37,49.38,47.23
8
+ Ernie-Bot-4.0,43.66,51.99,44.0,50.0
9
+ Gpt-3.5-Turbo,34.82,43.5,39.19,42.58
10
+ Gpt-4,,65.49,,63.54
11
+ Internlm-7B,20.48,23.85,23.69,26.06
12
+ Internlm2-Chat-20B,39.1,37.7,47.7,33.5
13
+ Internlm2-Chat-7B,36.8,31.7,46.3,36.9
14
+ Llama-2-13B,18.32,34.45,29.14,44.3
15
+ Llama-2-70B-Chat,23.64,39.31,39.12,47.9
16
+ Llama-2-7B,21.62,27.11,24.85,34.83
17
+ Mistral-7B,26.91,30.65,40.52,46.84
18
+ Qwen-14B-Chat,36.25,42.51,50.39,59.18
19
+ Qwen-72B-Chat,53.19,55.52,58.13,58.99
20
+ Qwen-7B-Chat,33.74,34.1,32.7,36.65
21
+ Yi-34B-Chat,37.04,52.1,61.19,53.39
22
+ Claude-3-Opus,49.599999999999994,,,
23
+ gemma_2b,20.1,24.2,31.2,35.5
24
+ gemma_7b,23.1,34.4,21.4,33.1
25
+ Meta-Llama-3-70B-Instruct,38.9,63.4,37.6,59.0
26
+ Meta-Llama-3-8B-Instruct,24.7,35.4,19.7,32.9
27
+ Qwen1.5-14B-Base,34.0,42.8,57.9,40.2
28
+ Qwen1.5-14B-Chat,35.6,41.1,34.7,47.4
data_v2/zte_zh_mc_gen.csv CHANGED
@@ -1,30 +1,30 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Baichuan-13B-Chat,11.04,11.13,26.92,28.61,14.35,13.22,31.69,33.97
3
- Chatglm2-6B,23.09,23.12,24.22,24.08,30.46,30.46,35.97,35.9
4
- Chatglm3-6B,32.6,32.6,35.4,35.4,28.3,28.3,40.9,40.9
5
- Chinese-Alpaca-2-13B,22.69,22.69,24.59,24.59,40.52,40.52,40.73,40.73
6
- Chinese-Llama-2-13B,17.98,17.98,17.83,17.83,31.66,31.66,36.24,36.24
7
- Devops-Model-14B-Chat,41.04,42.7,48.71,53.57,56.85,57.25,51.3,54.29
8
- Ernie-Bot-4.0,45.99,45.99,48.98,48.98,46.0,46.0,54.0,54.0
9
- Glm3-Turbo,43.0,43.0,,,,,,
10
- Glm4,50.0,50.0,,,,,,
11
- Gpt-3.5-Turbo,37.06,36.83,37.56,39.25,39.42,39.77,41.96,42.15
12
- Gpt-4,,,57.35,62.11,,,61.2,65.68
13
- Internlm-7B,27.81,27.81,19.95,19.95,24.18,24.18,35.35,35.35
14
- Internlm2-Chat-20B,44.6,44.6,47.0,47.0,62.2,62.2,38.3,38.3
15
- Internlm2-Chat-7B,38.8,38.8,44.6,44.6,46.0,46.0,35.8,35.8
16
- Llama-2-13B,25.43,27.16,29.17,29.99,36.56,36.15,37.7,39.02
17
- Llama-2-70B-Chat,24.38,24.38,43.63,43.63,44.65,44.65,48.84,48.84
18
- Llama-2-7B,24.09,23.47,28.69,29.26,29.94,30.03,31.35,31.93
19
- Mistral-7B,1.27,1.27,42.05,42.05,30.72,30.72,46.44,46.44
20
- Qwen-14B-Chat,41.71,41.44,45.58,47.98,53.52,49.92,54.72,58.85
21
- Qwen-72B-Chat,64.79,64.79,65.79,65.72,70.19,70.19,68.31,68.38
22
- Qwen-7B-Chat,36.28,36.5,33.18,33.51,41.58,40.59,31.48,31.46
23
- Yi-34B-Chat,64.91,64.58,62.77,65.51,70.85,70.92,48.77,47.97
24
- Claude-3-Opus,,51.4,,,,,,
25
- gemma_2b,25.6,25.6,28.3,28.3,19.1,19.1,35.5,35.5
26
- gemma_7b,27.3,27.3,35.4,35.4,17.3,17.3,44.5,44.5
27
- Meta-Llama-3-70B-Instruct,,31.1,,37.4,,51.10000000000001,,36.900000000000006
28
- Meta-Llama-3-8B-Instruct,,31.1,,34.3,,36.0,,37.1
29
- Qwen1.5-14B-Base,49.1,49.1,49.9,49.9,62.5,62.5,41.3,41.3
30
- Qwen1.5-14B-Chat,38.6,38.9,48.8,50.5,54.6,55.2,52.1,52.7
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Baichuan-13B-Chat,11.13,28.61,13.22,33.97
3
+ Chatglm2-6B,23.12,24.08,30.46,35.9
4
+ Chatglm3-6B,32.6,35.4,28.3,40.9
5
+ Chinese-Alpaca-2-13B,22.69,24.59,40.52,40.73
6
+ Chinese-Llama-2-13B,17.98,17.83,31.66,36.24
7
+ Devops-Model-14B-Chat,42.7,53.57,57.25,54.29
8
+ Ernie-Bot-4.0,45.99,48.98,46.0,54.0
9
+ Glm3-Turbo,43.0,,,
10
+ Glm4,50.0,,,
11
+ Gpt-3.5-Turbo,36.83,39.25,39.77,42.15
12
+ Gpt-4,,62.11,,65.68
13
+ Internlm-7B,27.81,19.95,24.18,35.35
14
+ Internlm2-Chat-20B,44.6,47.0,62.2,38.3
15
+ Internlm2-Chat-7B,38.8,44.6,46.0,35.8
16
+ Llama-2-13B,27.16,29.99,36.15,39.02
17
+ Llama-2-70B-Chat,24.38,43.63,44.65,48.84
18
+ Llama-2-7B,23.47,29.26,30.03,31.93
19
+ Mistral-7B,1.27,42.05,30.72,46.44
20
+ Qwen-14B-Chat,41.44,47.98,49.92,58.85
21
+ Qwen-72B-Chat,64.79,65.72,70.19,68.38
22
+ Qwen-7B-Chat,36.5,33.51,40.59,31.46
23
+ Yi-34B-Chat,64.58,65.51,70.92,47.97
24
+ Claude-3-Opus,51.4,,,
25
+ gemma_2b,25.6,28.3,19.1,35.5
26
+ gemma_7b,27.3,35.4,17.3,44.5
27
+ Meta-Llama-3-70B-Instruct,31.1,37.4,51.10000000000001,36.900000000000006
28
+ Meta-Llama-3-8B-Instruct,31.1,34.3,36.0,37.1
29
+ Qwen1.5-14B-Base,49.1,49.9,62.5,41.3
30
+ Qwen1.5-14B-Chat,38.9,50.5,55.2,52.7