Grounding ⚡️,Instruction Following 📝,Planning 📅,Reasoning 💡,Refinement 🔩,Safety ⚠️,Theory of Mind 🤔,Tool Usage 🛠️,Multilingual 🇬🇫,Model 🤗,Model Params (B),Model Type,Average 4.012,4.21,4.029,4.01,4.034,4.449,4.09,3.6,3.429,gpt-4-1106-preview,,Proprietary,4.054 4.112,4.13,3.929,4.15,4.0,4.145,4.15,3.725,3.329,gpt-4-0125-preview,,Proprietary,4.043 4.175,4.14,4.1,3.98,3.789,4.235,4.06,3.788,3.414,gpt-4o-2024-05-13,,Proprietary,4.033 4.112,4.09,3.986,3.92,3.862,4.116,4.06,3.688,3.357,gpt-4-turbo-2024-04-09,,Proprietary,3.979 4.075,3.88,4.157,3.8,3.741,4.435,4.05,3.425,3.357,claude-3-opus-20240229,,Proprietary,3.945 4.175,3.92,3.971,3.76,3.741,4.029,3.97,3.625,3.114,meta-llama/Meta-Llama-3-70B-Instruct,70.0,Chat,3.899 4.075,4.03,4.0,3.83,3.776,4.13,3.96,3.325,2.771,qwen/qwen-110b-chat,110.0,Chat,3.891 3.862,3.83,3.943,3.84,3.69,4.29,3.86,3.5,3.043,claude-3-sonnet-20240229,,Proprietary,3.852 4.0,3.94,3.957,3.58,3.569,4.275,3.93,3.538,2.871,claude-3-haiku-20240307,,Proprietary,3.849 3.925,3.91,3.843,3.82,3.552,4.116,3.91,3.688,2.971,mistral-medium,,Proprietary,3.845 3.875,3.88,3.871,3.83,3.5,4.145,4.01,3.288,3.1,gemini-pro-1.5,,Proprietary,3.8 3.9,3.83,3.757,3.66,3.638,3.957,3.94,3.712,2.871,mistral-large,,Proprietary,3.799 4.05,3.81,3.743,3.81,3.31,4.145,3.97,3.45,2.729,google/gemini-flash-1.5,,Proprietary,3.786 3.925,4.02,3.857,3.46,3.517,3.928,3.91,3.425,2.829,alpindale/c4ai-command-r-plus-GPTQ,104,Chat,3.755 3.712,3.92,3.771,3.53,3.586,4.101,3.92,3.425,2.629,Qwen/Qwen1.5-72B-Chat,72.0,Chat,3.746 3.812,3.96,3.771,3.6,3.379,4.043,3.84,3.45,2.757,MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-AWQ,141,Chat,3.732 3.9,3.85,3.486,3.54,3.776,4.232,3.81,3.062,1.971,microsoft/Phi-3-mini-4k-instruct,3.8,Chat,3.707 3.775,3.86,3.8,3.44,3.534,3.986,3.91,3.325,2.429,Starling-LM-7B-beta,7.0,Chat,3.704 3.65,3.85,3.643,3.55,3.121,4.246,3.8,3.488,2.671,Qwen/Qwen1.5-32B-Chat,32.0,Chat,3.668 3.85,3.75,3.814,3.3,3.345,3.928,3.71,3.362,3.043,meta-llama/Meta-Llama-3-8B-Instruct,8.0,Chat,3.632 3.65,3.89,3.571,3.45,3.138,4.014,3.78,3.2,2.743,mistralai/Mixtral-8x7B-Instruct-v0.1,46.7,Chat,3.587 3.8,3.86,3.757,3.43,3.259,3.957,3.64,2.988,2.586,gpt-3.5-turbo-0125,,Proprietary,3.586 3.812,3.75,3.714,3.41,3.241,4.087,3.65,3.0,2.586,gpt-3.5-turbo-1106,,Proprietary,3.583 3.638,3.8,3.8,3.17,3.155,3.826,3.7,3.5,2.4,allenai/tulu-2-dpo-70b,70.0,Chat,3.574 3.662,3.84,3.671,3.24,3.155,3.783,3.71,3.338,2.529,NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO,46.7,Chat,3.55 3.7,3.8,3.586,3.21,3.034,3.826,3.7,3.488,2.586,upstage/SOLAR-10.7B-Instruct-v1.0,10.7,Chat,3.543 3.462,3.74,3.714,3.27,3.414,4.087,3.81,2.812,2.014,01-ai/Yi-34B-Chat,34.0,Chat,3.539 3.588,3.66,3.471,3.66,3.345,3.942,3.7,2.912,1.814,microsoft/Phi-3-mini-128k-instruct,3.8,Chat,3.535 3.612,3.72,3.657,2.98,3.155,4.464,3.79,2.888,2.429,meta-llama/Llama-2-70b-chat-hf,70.0,Chat,3.533 3.712,3.72,3.643,3.14,3.19,4.014,3.88,2.95,1.957,CohereForAI/c4ai-command-r-v01,35.0,Chat,3.531 3.562,3.65,3.629,3.48,3.069,3.884,3.74,3.062,2.986,gemini-1.0-pro,,Proprietary,3.51 3.588,3.77,3.614,3.26,3.121,3.884,3.5,3.062,2.486,Qwen/Qwen1.5-14B-Chat,14.0,Chat,3.475 3.688,3.69,3.629,3.16,3.103,3.652,3.59,3.225,2.414,NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT,46.7,Chat,3.467 3.525,3.76,3.514,3.26,3.31,3.841,3.61,2.888,2.314,openchat/openchat-3.5-0106,7.0,Chat,3.463 3.688,3.74,3.6,3.01,3.103,3.957,3.49,3.012,2.6,mistralai/Mistral-7B-Instruct-v0.2,7.0,Chat,3.45 3.712,3.74,3.5,3.2,2.948,3.942,3.53,2.838,2.129,Starling-LM-7B-alpha,7.0,Chat,3.426 3.525,3.59,3.5,3.44,3.207,3.942,3.37,2.762,,mistral-community/Mixtral-8x22B-v0.1-AWQ,141,Base,3.417 3.288,3.62,3.686,3.25,3.345,3.551,3.45,3.062,2.543,MaziyarPanahi/zephyr-orpo-141b-A35b-v0.1-AWQ,141,Chat,3.406 3.4,3.74,3.4,3.04,3.0,3.754,3.71,2.975,2.043,Qwen/Qwen1.5-7B-Chat,7.0,Chat,3.377 3.2,3.63,3.557,3.24,3.207,3.609,3.55,2.85,1.9,NousResearch/Nous-Hermes-2-Yi-34B,34.0,Chat,3.355 3.488,3.56,3.314,3.12,3.052,4.072,3.44,2.675,2.029,google/gemma-1.1-7b-it,7.0,Chat,3.34 3.462,3.66,3.429,2.97,2.931,3.899,3.54,2.812,2.129,kaist-ai/mistral-orpo-beta,7.0,Chat,3.338 3.588,3.7,3.343,2.71,2.862,4.319,3.66,2.512,2.343,meta-llama/Llama-2-13b-chat-hf,13.0,Chat,3.337 3.388,3.56,3.443,2.86,3.103,4.029,3.45,2.825,2.114,kaist-ai/mistral-orpo-alpha,7.0,Chat,3.332 3.438,3.58,3.629,3.05,3.172,3.319,3.46,2.925,2.214,NousResearch/Nous-Hermes-2-Mistral-7B-DPO,7.0,Chat,3.322 3.388,3.4,3.414,3.01,3.138,3.725,3.43,3.075,2.014,allenai/codetulu-2-34b,34.0,Chat,3.322 3.575,3.53,3.557,3.07,3.172,3.304,3.42,2.875,2.243,teknium/OpenHermes-2.5-Mistral-7B,7.0,Chat,3.313 3.412,3.58,3.457,2.71,3.034,3.884,3.55,2.775,2.229,allenai/tulu-2-dpo-13b,13.0,Chat,3.3 3.375,3.56,3.5,3.0,2.897,3.522,3.5,3.05,1.957,HuggingFaceH4/zephyr-7b-beta,7.0,Chat,3.3 3.438,3.62,3.371,2.64,2.741,4.261,3.58,2.175,2.086,meta-llama/Llama-2-7b-chat-hf,7.0,Chat,3.228 3.55,3.45,3.186,3.14,2.759,3.812,3.33,2.538,,mistralai/Mixtral-8x7B-v0.1,46.7,Base,3.22 3.25,3.55,3.643,2.89,2.845,3.493,3.32,2.638,1.971,teknium/OpenHermes-2-Mistral-7B,7.0,Chat,3.203 3.375,3.41,3.114,2.97,2.914,3.899,3.17,2.762,,Qwen/Qwen1.5-72B,72.0,Base,3.202 3.35,3.39,3.286,2.85,2.724,4.101,3.37,2.5,2.186,codellama/CodeLlama-34b-Instruct-hf,34.0,Chat,3.196 3.25,3.67,3.243,2.68,2.707,3.768,3.51,2.325,1.986,allenai/tulu-2-dpo-7b,7.0,Chat,3.144 3.488,3.37,3.186,3.05,2.879,3.681,3.21,2.162,,01-ai/Yi-34B,34.0,Base,3.128 3.288,3.49,3.1,2.78,2.759,3.855,3.17,2.45,,meta-llama/Llama-2-70b-hf,70.0,Base,3.111 3.125,3.52,3.143,2.99,2.81,3.536,3.07,2.638,,Qwen/Qwen1.5-32B,32.0,Base,3.104 3.088,3.37,3.057,2.62,2.793,3.42,3.22,2.988,1.8,allenai/codetulu-2-13b,13.0,Chat,3.069 3.012,3.31,3.271,2.68,2.707,3.841,3.2,2.325,2.057,allenai/tulu-2-13b,13.0,Chat,3.043 3.088,3.37,3.114,2.75,2.759,3.565,3.25,2.225,,upstage/SOLAR-10.7B-v1.0,10.7,Base,3.015 3.15,3.34,2.814,2.91,2.828,3.652,3.17,2.2,1.657,google/gemma-7b-it,7.0,Chat,3.008 3.038,3.2,3.157,2.59,2.483,3.971,3.21,2.312,2.157,codellama/CodeLlama-13b-Instruct-hf,13.0,Chat,2.995 3.138,3.18,3.029,2.58,2.586,3.826,3.19,2.212,1.7,codellama/CodeLlama-7b-Instruct-hf,7.0,Chat,2.968 3.0,3.45,3.129,2.49,2.603,3.507,3.56,1.888,1.529,01-ai/Yi-6B-Chat,6.0,Chat,2.953 2.912,3.29,3.029,2.55,2.707,4.13,3.25,1.675,1.657,google/gemma-1.1-2b-it,2.0,Chat,2.943 3.388,3.3,2.914,2.72,2.862,2.623,3.06,2.55,,Qwen/Qwen1.5-14B,14.0,Base,2.927 3.25,3.22,2.786,2.76,2.69,3.261,2.92,2.312,,meta-llama/Meta-Llama-3-70B,70.0,Base,2.9 2.8,3.18,3.0,2.49,2.724,3.348,3.12,2.525,1.829,allenai/codetulu-2-7b,7.0,Chat,2.898 2.85,3.21,3.1,2.56,2.517,3.681,3.12,2.0,1.729,allenai/tulu-2-7b,7.0,Chat,2.88 2.8,3.09,2.971,2.36,2.638,4.043,3.12,1.75,1.686,google/gemma-2b-it,2.0,Chat,2.847 2.938,3.23,2.914,2.68,2.466,3.406,2.9,1.975,,mistralai/Mistral-7B-v0.1,7.0,Base,2.814 2.8,3.1,2.871,2.53,2.862,3.348,3.0,1.938,1.471,Qwen/Qwen1.5-4B-Chat,4.0,Chat,2.806 2.95,3.44,2.971,2.33,2.414,3.072,3.19,1.988,1.4,allenai/OLMo-7B-Instruct,7.0,Chat,2.794 3.025,3.24,2.786,2.58,2.483,3.203,3.07,1.862,,mistral-community/Mistral-7B-v0.2,7.0,Base,2.781 2.962,2.75,2.714,2.69,2.569,3.435,2.98,1.65,,microsoft/phi-2,2.7,Base,2.719 2.862,3.13,2.886,2.33,2.259,3.507,2.95,1.725,1.229,allenai/OLMo-7B-SFT,7.0,Chat,2.706 2.925,2.51,2.386,2.62,2.448,4.217,2.56,1.738,1.757,codellama/CodeLlama-70b-Instruct-hf,70.0,Chat,2.675 2.938,3.0,2.843,2.37,2.414,3.072,2.58,2.175,,Qwen/Qwen1.5-7B,7.0,Base,2.674 2.85,3.11,2.643,2.24,2.517,2.725,3.11,1.662,1.329,Qwen/Qwen1.5-1.8B-Chat,1.8,Chat,2.607 2.838,2.8,2.5,2.53,2.276,2.884,2.61,1.775,,EleutherAI/llemma_34b,34.0,Base,2.527 2.762,3.01,2.6,2.15,2.138,3.217,2.65,1.512,,meta-llama/Llama-2-13b-hf,13.0,Base,2.505 2.788,2.89,2.443,2.23,2.155,3.275,2.51,1.675,,Qwen/Qwen1.5-4B,4.0,Base,2.496 2.975,2.81,2.314,2.27,2.362,2.913,2.64,1.65,,meta-llama/Meta-Llama-3-8B,8.0,Base,2.492 2.775,2.76,2.557,2.3,2.052,3.043,2.74,1.412,,01-ai/Yi-6B,6.0,Base,2.455 2.888,2.47,1.629,2.13,2.017,2.826,2.8,2.05,1.971,microsoft/Orca-2-13b,13.0,Chat,2.351 2.75,2.42,2.329,2.32,1.966,2.696,2.23,2.025,,codellama/CodeLlama-70b-hf,70.0,Base,2.342 2.45,2.84,2.257,2.12,2.172,2.913,2.62,1.275,,microsoft/phi-1_5,1.3,Base,2.331 2.462,2.87,2.257,2.05,1.793,3.159,2.4,1.262,,meta-llama/Llama-2-7b-hf,7.0,Base,2.282 2.675,2.41,2.129,1.98,2.069,2.594,2.45,1.8,,codellama/CodeLlama-34b-hf,34.0,Base,2.263 2.425,2.7,2.229,1.81,2.086,2.449,2.38,1.35,,Qwen/Qwen1.5-1.8B,1.8,Base,2.179 2.25,2.65,2.086,1.94,1.862,2.638,2.31,1.288,,google/gemma-2b,2.0,Base,2.128 2.238,2.46,1.829,1.97,1.897,2.522,2.03,1.612,,EleutherAI/llemma_7b,7.0,Base,2.07 2.075,2.44,1.914,1.64,1.69,2.42,2.26,1.25,1.186,Qwen/Qwen1.5-0.5B-Chat,0.5,Chat,1.961 2.288,2.26,1.314,1.72,1.81,2.623,2.25,1.338,1.843,microsoft/Orca-2-7b,7.0,Chat,1.95 2.125,2.19,1.743,1.76,1.828,2.667,2.02,1.15,,allenai/OLMo-7B,7.0,Base,1.935 2.1,2.06,1.757,1.71,1.621,2.275,1.89,1.588,,codellama/CodeLlama-13b-hf,13.0,Base,1.875 1.75,2.05,1.471,1.59,1.534,2.261,1.79,1.375,,codellama/CodeLlama-7b-hf,7.0,Base,1.728 1.925,2.04,1.6,1.51,1.5,1.957,1.72,1.188,,Qwen/Qwen1.5-0.5B,0.5,Base,1.68 1.675,1.64,1.357,1.31,1.31,2.087,1.44,1.062,,allenai/OLMo-1B,1.0,Base,1.485 1.25,1.4,1.357,1.34,1.362,1.667,1.4,1.15,1.157,CohereForAI/aya-101,13.0,Chat,1.366 1.375,1.46,1.214,1.22,1.034,1.928,1.19,1.012,,google/gemma-7b,7.0,Base,1.304 1.038,1.01,1.0,1.0,1.017,1.377,1.0,1.012,,microsoft/phi-1,1.3,Base,1.057