-,-,Zero-shot,-,v,-,3-Shot,-,-,-,- -,-,Naïve,SC,CoT,CoT+SC,Naïve,SC,CoT,CoT+SC,Best Score 1,✨ GPT-4,/,/,/,/,/,/,86,86,86 2,✨ ERNIE-Bot-4.0,67.54,67.54,71.96,71.96,72,72,78,78,78 3,✨ Yi-34B-Chat,61.61,62.56,68.11,69.75,65.73,65.37,69.88,71.21,71.21 4,✨ Qwen-72B-Chat,65.77,65.86,68.13,68.3,69.4,69.4,69.99,70.08,70.08 5,✨ Hunyuan-13B,60,60,70,70,,,,,70 6,✨ GPT-3.5-turbo,58.4,58.6,64.8,67.6,59.2,59.7,65.2,67.4,67.6 7,✨ GLM4,67.38,67.38,,,,,,,67.38 8,✨ qwen1.5-14b-chat,54.04,53.87,62.56,63.86,58.78,58.09,63.43,65.58,65.58 9,✨ DevOps-Model-14B-Chat,47.59,46.57,52.52,56.01,62.07,60.08,50.59,55.79,62.07 10,✨ qwen1.5-14b-base,45.18,45.18,59.12,59.12,61.1,61.1,52.5,52.5,61.1 11,✨ InternLM2-Chat-7B,54.3,54.3,59.81,59.81,58.52,58.52,51.64,51.64,59.81 12,✨ GLM3-turbo,59.64,59.64,,,,,,,59.64 13,✨ InternLM2-Chat-20B,57.49,57.49,57.14,57.14,59.12,59.12,50.77,50.77,59.12 14,✨ Qwen-14B-Chat,48.35,48.81,55.35,57.4,58.53,56.12,52.12,54.99,58.53 15,✨ LLaMA-2-70B-Chat,38.55,38.55,57.49,57.49,49.09,49.09,48.57,48.57,57.49 16,✨ LLaMA-2-13B,29.7,31.6,51.6,57,39.6,38.9,48,50.6,57 17,✨ Baichuan-13B-Chat,15.2,16,43.9,49.7,34.3,36.1,51.3,55.6,55.6 18,✨ LLaMA-2-7B,29.8,30.2,50.1,55.6,38.6,40.8,45.6,50.4,55.6 19,✨ Qwen-7B-Chat,29.6,29.9,50.6,53.5,50.4,46.9,46.9,47.7,53.5 20,✨ ChatGLM3-6B,41.39,41.39,49.23,49.23,38.81,38.81,42.86,42.86,49.23 21,✨ gemma_7b,31.58,31.58,47.59,47.59,34.68,34.68,48.88,48.88,48.88 22,✨ AquilaChat2-34B,34.66,34.66,47.74,47.74,44.48,44.48,NULL,NULL,47.74 23,✨ Mistral-7B,1.9,1.9,45.61,45.61,15,15,35.97,35.97,45.61 24,✨ Chinese-Alpaca-2-13B,33.1,33.1,44.2,44.2,44,44,42.7,42.7,44.2 25,✨ InternLM-7B,41.7,41.7,38.4,38.4,42.6,42.6,41.3,41.3,42.6 26,✨ ChatGLM2-6B,33.8,33.7,42.1,42.2,36,36,39.5,39.5,42.2 27,✨ Chinese-LLaMA-2-13B,22.5,22.5,38.8,38.8,41.8,41.8,32.2,32.2,41.8 28,✨ gemma_2b,29.69,29.69,39.16,39.16,29.78,29.78,38.64,38.64,39.16 29,✨ Baichuan2-13B-Chat,35.6,35.9,30.5,30.5,34.6,35.6,30.2,32,35.9