Spaces:
Running
Running
[ | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7026, | |
"Completeness": 0.7014, | |
"Conciseness": 0.1631, | |
"Helpfulness": 0.6784, | |
"Honesty": 0.6972, | |
"Harmlessness": 0.7026, | |
"3C3H Score": 0.6076 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7151, | |
"Reasoning": 0.64, | |
"Orthographic and Grammatical Analysis": 0.0887, | |
"Safety": 0.4729 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/aya-expanse-32b", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 32.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5612, | |
"Completeness": 0.5612, | |
"Conciseness": 0.1172, | |
"Helpfulness": 0.5468, | |
"Honesty": 0.5519, | |
"Harmlessness": 0.5594, | |
"3C3H Score": 0.4829 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5526, | |
"Reasoning": 0.5561, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.4271 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/aya-expanse-8b", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 8.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4648, | |
"Completeness": 0.46, | |
"Conciseness": 0.1251, | |
"Helpfulness": 0.4415, | |
"Honesty": 0.4495, | |
"Harmlessness": 0.4639, | |
"3C3H Score": 0.4008 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5056, | |
"Reasoning": 0.3817, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2917 | |
} | |
}, | |
"Meta": { | |
"Model Name": "FreedomIntelligence/AceGPT-13B-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 13.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4158, | |
"Completeness": 0.4158, | |
"Conciseness": 0.0941, | |
"Helpfulness": 0.3817, | |
"Honesty": 0.3934, | |
"Harmlessness": 0.4158, | |
"3C3H Score": 0.3527 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4017, | |
"Reasoning": 0.4367, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2104 | |
} | |
}, | |
"Meta": { | |
"Model Name": "FreedomIntelligence/AceGPT-7B-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 7.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5568, | |
"Completeness": 0.546, | |
"Conciseness": 0.2094, | |
"Helpfulness": 0.5302, | |
"Honesty": 0.5391, | |
"Harmlessness": 0.5568, | |
"3C3H Score": 0.4897 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6084, | |
"Reasoning": 0.4717, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.4083 | |
} | |
}, | |
"Meta": { | |
"Model Name": "FreedomIntelligence/AceGPT-v2-8B-Chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 8.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.1547, | |
"Completeness": 0.1439, | |
"Conciseness": 0.0369, | |
"Helpfulness": 0.116, | |
"Honesty": 0.1286, | |
"Harmlessness": 0.1538, | |
"3C3H Score": 0.1223 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.1201, | |
"Reasoning": 0.1094, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3771 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-0.5B-Instruct", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 0.465, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4468, | |
"Completeness": 0.4432, | |
"Conciseness": 0.1278, | |
"Helpfulness": 0.4179, | |
"Honesty": 0.4271, | |
"Harmlessness": 0.4459, | |
"3C3H Score": 0.3848 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.3684, | |
"Reasoning": 0.4983, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.6812 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-3B-Instruct", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 3.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7192, | |
"Completeness": 0.718, | |
"Conciseness": 0.1906, | |
"Helpfulness": 0.6986, | |
"Honesty": 0.7094, | |
"Harmlessness": 0.7192, | |
"3C3H Score": 0.6258 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6677, | |
"Reasoning": 0.7594, | |
"Orthographic and Grammatical Analysis": 0.1075, | |
"Safety": 0.6083 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-72B-Instruct", | |
"License": "qwen", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 72.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6499, | |
"Completeness": 0.6487, | |
"Conciseness": 0.2016, | |
"Helpfulness": 0.6386, | |
"Honesty": 0.638, | |
"Harmlessness": 0.6499, | |
"3C3H Score": 0.5711 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6395, | |
"Reasoning": 0.6122, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.7792 | |
} | |
}, | |
"Meta": { | |
"Model Name": "google/gemma-2-27b-it", | |
"License": "gemma", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 27.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.589, | |
"Completeness": 0.589, | |
"Conciseness": 0.1834, | |
"Helpfulness": 0.5797, | |
"Honesty": 0.5744, | |
"Harmlessness": 0.589, | |
"3C3H Score": 0.5174 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5462, | |
"Reasoning": 0.6011, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.7854 | |
} | |
}, | |
"Meta": { | |
"Model Name": "google/gemma-2-9b-it", | |
"License": "gemma", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 9.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5579, | |
"Completeness": 0.5544, | |
"Conciseness": 0.1682, | |
"Helpfulness": 0.5352, | |
"Honesty": 0.5436, | |
"Harmlessness": 0.5579, | |
"3C3H Score": 0.4862 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5925, | |
"Reasoning": 0.48, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.45 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-adapted-13b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 13.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6679, | |
"Completeness": 0.6655, | |
"Conciseness": 0.1804, | |
"Helpfulness": 0.6326, | |
"Honesty": 0.652, | |
"Harmlessness": 0.6679, | |
"3C3H Score": 0.5777 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6864, | |
"Reasoning": 0.5711, | |
"Orthographic and Grammatical Analysis": 0.0578, | |
"Safety": 0.5771 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-adapted-70b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 70.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5211, | |
"Completeness": 0.5102, | |
"Conciseness": 0.1339, | |
"Helpfulness": 0.4798, | |
"Honesty": 0.5093, | |
"Harmlessness": 0.5202, | |
"3C3H Score": 0.4457 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5144, | |
"Reasoning": 0.4844, | |
"Orthographic and Grammatical Analysis": 0.0269, | |
"Safety": 0.4312 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-13b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 13.0, | |
"Total Entries": 279, | |
"Successful Entries": 277, | |
"Failed Entries": 2, | |
"Success Ratio": 0.9928 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.3729, | |
"Completeness": 0.3669, | |
"Conciseness": 0.0887, | |
"Helpfulness": 0.3441, | |
"Honesty": 0.3543, | |
"Harmlessness": 0.3711, | |
"3C3H Score": 0.3163 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.348, | |
"Reasoning": 0.3761, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3417 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-2p7b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 3.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5806, | |
"Completeness": 0.5759, | |
"Conciseness": 0.1526, | |
"Helpfulness": 0.5475, | |
"Honesty": 0.5621, | |
"Harmlessness": 0.5806, | |
"3C3H Score": 0.4999 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5812, | |
"Reasoning": 0.5239, | |
"Orthographic and Grammatical Analysis": 0.0282, | |
"Safety": 0.5187 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-30b-8k-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 30.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4755, | |
"Completeness": 0.4731, | |
"Conciseness": 0.1243, | |
"Helpfulness": 0.4522, | |
"Honesty": 0.4597, | |
"Harmlessness": 0.4755, | |
"3C3H Score": 0.41 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4743, | |
"Reasoning": 0.4633, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3542 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-6p7b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 7.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6392, | |
"Completeness": 0.6129, | |
"Conciseness": 0.27, | |
"Helpfulness": 0.6016, | |
"Honesty": 0.6171, | |
"Harmlessness": 0.6383, | |
"3C3H Score": 0.5632 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6465, | |
"Reasoning": 0.6283, | |
"Orthographic and Grammatical Analysis": 0.0591, | |
"Safety": 0.4625 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.1-70B-Instruct", | |
"License": "llama3.1", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 70.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4421, | |
"Completeness": 0.4409, | |
"Conciseness": 0.1416, | |
"Helpfulness": 0.3967, | |
"Honesty": 0.4065, | |
"Harmlessness": 0.4421, | |
"3C3H Score": 0.3783 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.3826, | |
"Reasoning": 0.45, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.6625 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.1-8B-Instruct", | |
"License": "llama3.1", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 8.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.2359, | |
"Completeness": 0.2058, | |
"Conciseness": 0.0581, | |
"Helpfulness": 0.1781, | |
"Honesty": 0.2106, | |
"Harmlessness": 0.2341, | |
"3C3H Score": 0.1871 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.198, | |
"Reasoning": 0.2328, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2229 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Meta-Llama-3-8B-Instruct", | |
"License": "llama3", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 14.963, | |
"Total Entries": 279, | |
"Successful Entries": 277, | |
"Failed Entries": 2, | |
"Success Ratio": 0.9928 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5204, | |
"Completeness": 0.1295, | |
"Conciseness": 0.4149, | |
"Helpfulness": 0.2332, | |
"Honesty": 0.4814, | |
"Harmlessness": 0.5204, | |
"3C3H Score": 0.3833 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4053, | |
"Reasoning": 0.3806, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.8188 | |
} | |
}, | |
"Meta": { | |
"Model Name": "silma-ai/SILMA-9B-Instruct-v1.0", | |
"License": "gemma", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 9.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
} | |
] |