[ { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.7026, "Completeness": 0.7014, "Conciseness": 0.1631, "Helpfulness": 0.6784, "Honesty": 0.6972, "Harmlessness": 0.7026, "3C3H Score": 0.6076 }, "Tasks Scores": { "Question Answering (QA)": 0.7151, "Reasoning": 0.64, "Orthographic and Grammatical Analysis": 0.0887, "Safety": 0.4729 } }, "Meta": { "Model Name": "CohereForAI/aya-expanse-32b", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 32.0, "Total Entries": 279, "Successful Entries": 278, "Failed Entries": 1, "Success Ratio": 0.9964 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5612, "Completeness": 0.5612, "Conciseness": 0.1172, "Helpfulness": 0.5468, "Honesty": 0.5519, "Harmlessness": 0.5594, "3C3H Score": 0.4829 }, "Tasks Scores": { "Question Answering (QA)": 0.5526, "Reasoning": 0.5561, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.4271 } }, "Meta": { "Model Name": "CohereForAI/aya-expanse-8b", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 8.0, "Total Entries": 279, "Successful Entries": 278, "Failed Entries": 1, "Success Ratio": 0.9964 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4648, "Completeness": 0.46, "Conciseness": 0.1251, "Helpfulness": 0.4415, "Honesty": 0.4495, "Harmlessness": 0.4639, "3C3H Score": 0.4008 }, "Tasks Scores": { "Question Answering (QA)": 0.5056, "Reasoning": 0.3817, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.2917 } }, "Meta": { "Model Name": "FreedomIntelligence/AceGPT-13B-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float16", "Params": 13.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4158, "Completeness": 0.4158, "Conciseness": 0.0941, "Helpfulness": 0.3817, "Honesty": 0.3934, "Harmlessness": 0.4158, "3C3H Score": 0.3527 }, "Tasks Scores": { "Question Answering (QA)": 0.4017, "Reasoning": 0.4367, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.2104 } }, "Meta": { "Model Name": "FreedomIntelligence/AceGPT-7B-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float16", "Params": 7.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5568, "Completeness": 0.546, "Conciseness": 0.2094, "Helpfulness": 0.5302, "Honesty": 0.5391, "Harmlessness": 0.5568, "3C3H Score": 0.4897 }, "Tasks Scores": { "Question Answering (QA)": 0.6084, "Reasoning": 0.4717, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.4083 } }, "Meta": { "Model Name": "FreedomIntelligence/AceGPT-v2-8B-Chat", "License": "apache-2.0", "Revision": "main", "Precision": "float16", "Params": 8.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.1547, "Completeness": 0.1439, "Conciseness": 0.0369, "Helpfulness": 0.116, "Honesty": 0.1286, "Harmlessness": 0.1538, "3C3H Score": 0.1223 }, "Tasks Scores": { "Question Answering (QA)": 0.1201, "Reasoning": 0.1094, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.3771 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-0.5B-Instruct", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 0.465, "Total Entries": 279, "Successful Entries": 278, "Failed Entries": 1, "Success Ratio": 0.9964 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4468, "Completeness": 0.4432, "Conciseness": 0.1278, "Helpfulness": 0.4179, "Honesty": 0.4271, "Harmlessness": 0.4459, "3C3H Score": 0.3848 }, "Tasks Scores": { "Question Answering (QA)": 0.3684, "Reasoning": 0.4983, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.6812 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-3B-Instruct", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 3.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.7192, "Completeness": 0.718, "Conciseness": 0.1906, "Helpfulness": 0.6986, "Honesty": 0.7094, "Harmlessness": 0.7192, "3C3H Score": 0.6258 }, "Tasks Scores": { "Question Answering (QA)": 0.6677, "Reasoning": 0.7594, "Orthographic and Grammatical Analysis": 0.1075, "Safety": 0.6083 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-72B-Instruct", "License": "qwen", "Revision": "main", "Precision": "bfloat16", "Params": 72.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6499, "Completeness": 0.6487, "Conciseness": 0.2016, "Helpfulness": 0.6386, "Honesty": 0.638, "Harmlessness": 0.6499, "3C3H Score": 0.5711 }, "Tasks Scores": { "Question Answering (QA)": 0.6395, "Reasoning": 0.6122, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7792 } }, "Meta": { "Model Name": "google/gemma-2-27b-it", "License": "gemma", "Revision": "main", "Precision": "bfloat16", "Params": 27.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.589, "Completeness": 0.589, "Conciseness": 0.1834, "Helpfulness": 0.5797, "Honesty": 0.5744, "Harmlessness": 0.589, "3C3H Score": 0.5174 }, "Tasks Scores": { "Question Answering (QA)": 0.5462, "Reasoning": 0.6011, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7854 } }, "Meta": { "Model Name": "google/gemma-2-9b-it", "License": "gemma", "Revision": "main", "Precision": "bfloat16", "Params": 9.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5579, "Completeness": 0.5544, "Conciseness": 0.1682, "Helpfulness": 0.5352, "Honesty": 0.5436, "Harmlessness": 0.5579, "3C3H Score": 0.4862 }, "Tasks Scores": { "Question Answering (QA)": 0.5925, "Reasoning": 0.48, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.45 } }, "Meta": { "Model Name": "inceptionai/jais-adapted-13b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 13.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6679, "Completeness": 0.6655, "Conciseness": 0.1804, "Helpfulness": 0.6326, "Honesty": 0.652, "Harmlessness": 0.6679, "3C3H Score": 0.5777 }, "Tasks Scores": { "Question Answering (QA)": 0.6864, "Reasoning": 0.5711, "Orthographic and Grammatical Analysis": 0.0578, "Safety": 0.5771 } }, "Meta": { "Model Name": "inceptionai/jais-adapted-70b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 70.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5211, "Completeness": 0.5102, "Conciseness": 0.1339, "Helpfulness": 0.4798, "Honesty": 0.5093, "Harmlessness": 0.5202, "3C3H Score": 0.4457 }, "Tasks Scores": { "Question Answering (QA)": 0.5144, "Reasoning": 0.4844, "Orthographic and Grammatical Analysis": 0.0269, "Safety": 0.4312 } }, "Meta": { "Model Name": "inceptionai/jais-family-13b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 13.0, "Total Entries": 279, "Successful Entries": 277, "Failed Entries": 2, "Success Ratio": 0.9928 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3729, "Completeness": 0.3669, "Conciseness": 0.0887, "Helpfulness": 0.3441, "Honesty": 0.3543, "Harmlessness": 0.3711, "3C3H Score": 0.3163 }, "Tasks Scores": { "Question Answering (QA)": 0.348, "Reasoning": 0.3761, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.3417 } }, "Meta": { "Model Name": "inceptionai/jais-family-2p7b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 3.0, "Total Entries": 279, "Successful Entries": 278, "Failed Entries": 1, "Success Ratio": 0.9964 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5806, "Completeness": 0.5759, "Conciseness": 0.1526, "Helpfulness": 0.5475, "Honesty": 0.5621, "Harmlessness": 0.5806, "3C3H Score": 0.4999 }, "Tasks Scores": { "Question Answering (QA)": 0.5812, "Reasoning": 0.5239, "Orthographic and Grammatical Analysis": 0.0282, "Safety": 0.5187 } }, "Meta": { "Model Name": "inceptionai/jais-family-30b-8k-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 30.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4755, "Completeness": 0.4731, "Conciseness": 0.1243, "Helpfulness": 0.4522, "Honesty": 0.4597, "Harmlessness": 0.4755, "3C3H Score": 0.41 }, "Tasks Scores": { "Question Answering (QA)": 0.4743, "Reasoning": 0.4633, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.3542 } }, "Meta": { "Model Name": "inceptionai/jais-family-6p7b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 7.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6392, "Completeness": 0.6129, "Conciseness": 0.27, "Helpfulness": 0.6016, "Honesty": 0.6171, "Harmlessness": 0.6383, "3C3H Score": 0.5632 }, "Tasks Scores": { "Question Answering (QA)": 0.6465, "Reasoning": 0.6283, "Orthographic and Grammatical Analysis": 0.0591, "Safety": 0.4625 } }, "Meta": { "Model Name": "meta-llama/Llama-3.1-70B-Instruct", "License": "llama3.1", "Revision": "main", "Precision": "bfloat16", "Params": 70.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4421, "Completeness": 0.4409, "Conciseness": 0.1416, "Helpfulness": 0.3967, "Honesty": 0.4065, "Harmlessness": 0.4421, "3C3H Score": 0.3783 }, "Tasks Scores": { "Question Answering (QA)": 0.3826, "Reasoning": 0.45, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.6625 } }, "Meta": { "Model Name": "meta-llama/Llama-3.1-8B-Instruct", "License": "llama3.1", "Revision": "main", "Precision": "bfloat16", "Params": 8.0, "Total Entries": 279, "Successful Entries": 279, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2359, "Completeness": 0.2058, "Conciseness": 0.0581, "Helpfulness": 0.1781, "Honesty": 0.2106, "Harmlessness": 0.2341, "3C3H Score": 0.1871 }, "Tasks Scores": { "Question Answering (QA)": 0.198, "Reasoning": 0.2328, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.2229 } }, "Meta": { "Model Name": "meta-llama/Meta-Llama-3-8B-Instruct", "License": "llama3", "Revision": "main", "Precision": "bfloat16", "Params": 14.963, "Total Entries": 279, "Successful Entries": 277, "Failed Entries": 2, "Success Ratio": 0.9928 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5204, "Completeness": 0.1295, "Conciseness": 0.4149, "Helpfulness": 0.2332, "Honesty": 0.4814, "Harmlessness": 0.5204, "3C3H Score": 0.3833 }, "Tasks Scores": { "Question Answering (QA)": 0.4053, "Reasoning": 0.3806, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.8188 } }, "Meta": { "Model Name": "silma-ai/SILMA-9B-Instruct-v1.0", "License": "gemma", "Revision": "main", "Precision": "bfloat16", "Params": 9.0, "Total Entries": 279, "Successful Entries": 278, "Failed Entries": 1, "Success Ratio": 0.9964 } } ]