OpsEval / data_v2 /oracle_zh_mc_gen.csv
Junetheriver's picture
update 05-15
32e04fa
raw
history blame
1.89 kB
name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
Aquilachat2-34B,34.66,34.66,47.74,47.74,44.48,44.48,,
Baichuan-13B-Chat,12.88,12.07,25.96,27.57,18.91,19.52,27.97,30.58
Baichuan2-13B-Chat,25.7,25.5,20.1,21.3,27.7,26.7,22.7,24.7
Chatglm2-6B,23.34,23.34,24.35,24.14,22.94,22.94,26.16,26.16
Chatglm3-6B,21.32796781,21.32796781,28.97384306,28.97384306,21.73038229,21.73038229,29.57746479,29.57746479
Chinese-Alpaca-2-13B,22.94,22.94,25.75,25.75,25.15,25.15,22.33,22.33
Chinese-Llama-2-13B,14.69,14.69,19.92,19.92,19.72,19.72,20.93,20.93
Devops-Model-14B-Chat,24.75,22.74,28.37,27.77,36.62,37.02,27.57,26.36
Ernie-Bot-4.0,48.56,48.56,50.64,50.64,48.0,48.0,54.0,54.0
Gpt-3.5-Turbo,36.42,35.81,39.24,43.26,39.84,39.44,27.16,27.77
Gpt-4,,,59.38,65.17,,,44.06,48.09
Internlm-7B,25.96,25.96,25.96,25.96,29.18,29.18,28.37,28.37
Internlm2-Chat-7B,28.57142857,28.57142857,31.79074447,31.79074447,30.78470825,30.78470825,31.18712274,31.18712274
Llama-2-13B,23.94,24.35,29.58,31.99,24.55,26.76,21.13,20.72
Llama-2-70B-Chat,15.29,15.29,34.81,34.81,26.76,26.76,33.8,33.8
Llama-2-7B,20.72,20.72,27.16,27.97,21.53,18.51,18.31,17.91
Mistral-7B,1.9,1.9,45.61,45.61,15.0,15.0,35.97,35.97
Qwen-14B-Chat,27.57,27.57,32.39,36.02,40.04,35.41,30.38,33.4
Qwen-72B-Chat,48.29,48.49,49.5,49.7,49.7,49.7,45.27,44.87
Qwen-7B-Chat,18.51,17.71,27.36,28.37,29.78,29.58,33.6,31.79
Yi-34B-Chat,49.9,49.3,52.72,53.72,56.34,56.34,51.31,54.33
gemma_2b,18.51107,18.51107,24.9497,24.9497,21.52918,21.52918,27.7666,27.7666
gemma_7b,19.3159,19.3159,53.94737,53.94737,18.51107,18.51107,5.204461,5.204461
Qwen1.5-14B-Base,20.92555,33.91785690993282,35.61368,27.773429857170807,41.44869,41.359323028761494,30.78471,32.62733972477663
Qwen1.5-14B-Chat,24.14487,20.92555,40.64386,35.61368,38.22938,41.44869,39.43662,30.78471
Qwen1.5-14B-Chat,,23.34004,,41.04628,,38.02817,,40.04024