name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con Baichuan2-13B-Chat,31.0,29.0,47.0,47.0,29.0,27.0,40.0,43.0 ChatGLM3-6B,29.0,29.0,36.0,36.0,29.0,29.0,34.0,34.0 DevOps-Model-14B-Chat,27.0,28.0,36.0,33.0,46.0,44.0,44.0,46.0 ERNIE-Bot-4.0,44.0,44.0,48.0,48.0,47.0,47.0,51.0,51.0 GPT-3.5-turbo,36.0,36.0,42.0,42.0,40.0,40.0,48.0,48.0 GPT-4,51.0,51.0,53.0,53.0,60.0,60.0,59.0,59.0 InternLM2-Chat-20B,41.0,41.0,,,44.0,44.0,, InternLM2-Chat-7B,43.0,43.0,39.0,39.0,45.0,45.0,35.0,35.0 LLaMA-2-13B,28.0,28.0,45.0,45.0,40.0,40.0,43.0,43.0 LLaMA-2-70B-Chat,1.0,1.0,47.0,47.0,29.0,29.0,46.0,46.0 LLaMA-2-7B,18.0,18.0,35.0,35.0,22.0,22.0,28.0,28.0 Mistral-7B,6.0,6.0,42.0,42.0,11.0,11.0,44.0,44.0 Qwen-14B-Chat,36.0,36.0,39.0,41.0,44.0,40.0,47.0,43.0 Qwen-72B-Chat,46.0,46.0,44.0,44.0,45.0,45.0,61.0,61.0 Yi-34B-Chat,40.0,40.0,40.0,40.0,42.0,42.0,42.0,42.0 Claude-3-Opus,61.71875,61.71875,,,,,, Deepseek-R1-Distill-Llama-8B,34.89583333333333,34.89583333333333,36.02430555555556,36.02430555555556,25.260416666666664,25.260416666666664,36.63194444444444,36.63194444444444 Deepseek-R1-Distill-Qwen-1.5B,23.4375,23.4375,25.78125,25.78125,16.40625,16.40625,32.11805555555556,32.11805555555556 Deepseek-R1-Distill-Qwen-7B,25.43402777777778,25.43402777777778,28.993055555555557,28.993055555555557,23.697916666666664,23.697916666666664,32.89930555555556,32.89930555555556 Gemma-2B,25.0,25.0,32.0,32.0,24.0,24.0,30.0,30.0 Gemma-7B,22.0,22.0,44.0,44.0,28.0,28.0,40.0,40.0 Meta-Llama-3-8B-Instruct,39.670138888888886,39.670138888888886,37.58680555555556,37.58680555555556,30.381944444444443,30.381944444444443,33.072916666666664,33.072916666666664 Qwen1.5-14B-Base,38.0,38.0,39.0,39.0,48.0,48.0,36.0,36.0 Qwen1.5-14B-Chat,34.0,34.0,45.0,43.0,42.0,39.0,48.0,49.0