Spaces:
Running
Running
[ | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7026, | |
"Completeness": 0.7014, | |
"Conciseness": 0.1631, | |
"Helpfulness": 0.6784, | |
"Honesty": 0.6972, | |
"Harmlessness": 0.7026, | |
"3C3H Score": 0.6076 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7151, | |
"Reasoning": 0.64, | |
"Orthographic and Grammatical Analysis": 0.0887, | |
"Safety": 0.4729 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/aya-expanse-32b", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 32.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5612, | |
"Completeness": 0.5612, | |
"Conciseness": 0.1172, | |
"Helpfulness": 0.5468, | |
"Honesty": 0.5519, | |
"Harmlessness": 0.5594, | |
"3C3H Score": 0.4829 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5526, | |
"Reasoning": 0.5561, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.4271 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/aya-expanse-8b", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 8.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4648, | |
"Completeness": 0.46, | |
"Conciseness": 0.1251, | |
"Helpfulness": 0.4415, | |
"Honesty": 0.4495, | |
"Harmlessness": 0.4639, | |
"3C3H Score": 0.4008 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5056, | |
"Reasoning": 0.3817, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2917 | |
} | |
}, | |
"Meta": { | |
"Model Name": "FreedomIntelligence/AceGPT-13B-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 13.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4158, | |
"Completeness": 0.4158, | |
"Conciseness": 0.0941, | |
"Helpfulness": 0.3817, | |
"Honesty": 0.3934, | |
"Harmlessness": 0.4158, | |
"3C3H Score": 0.3527 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4017, | |
"Reasoning": 0.4367, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2104 | |
} | |
}, | |
"Meta": { | |
"Model Name": "FreedomIntelligence/AceGPT-7B-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 7.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5568, | |
"Completeness": 0.546, | |
"Conciseness": 0.2094, | |
"Helpfulness": 0.5302, | |
"Honesty": 0.5391, | |
"Harmlessness": 0.5568, | |
"3C3H Score": 0.4897 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6084, | |
"Reasoning": 0.4717, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.4083 | |
} | |
}, | |
"Meta": { | |
"Model Name": "FreedomIntelligence/AceGPT-v2-8B-Chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 8.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.1547, | |
"Completeness": 0.1439, | |
"Conciseness": 0.0369, | |
"Helpfulness": 0.116, | |
"Honesty": 0.1286, | |
"Harmlessness": 0.1538, | |
"3C3H Score": 0.1223 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.1201, | |
"Reasoning": 0.1094, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3771 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-0.5B-Instruct", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 0.465, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4468, | |
"Completeness": 0.4432, | |
"Conciseness": 0.1278, | |
"Helpfulness": 0.4179, | |
"Honesty": 0.4271, | |
"Harmlessness": 0.4459, | |
"3C3H Score": 0.3848 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.3684, | |
"Reasoning": 0.4983, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.6812 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-3B-Instruct", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 3.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7192, | |
"Completeness": 0.718, | |
"Conciseness": 0.1906, | |
"Helpfulness": 0.6986, | |
"Honesty": 0.7094, | |
"Harmlessness": 0.7192, | |
"3C3H Score": 0.6258 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6677, | |
"Reasoning": 0.7594, | |
"Orthographic and Grammatical Analysis": 0.1075, | |
"Safety": 0.6083 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-72B-Instruct", | |
"License": "qwen", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 72.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6499, | |
"Completeness": 0.6487, | |
"Conciseness": 0.2016, | |
"Helpfulness": 0.6386, | |
"Honesty": 0.638, | |
"Harmlessness": 0.6499, | |
"3C3H Score": 0.5711 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6395, | |
"Reasoning": 0.6122, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.7792 | |
} | |
}, | |
"Meta": { | |
"Model Name": "google/gemma-2-27b-it", | |
"License": "gemma", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 27.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.589, | |
"Completeness": 0.589, | |
"Conciseness": 0.1834, | |
"Helpfulness": 0.5797, | |
"Honesty": 0.5744, | |
"Harmlessness": 0.589, | |
"3C3H Score": 0.5174 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5462, | |
"Reasoning": 0.6011, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.7854 | |
} | |
}, | |
"Meta": { | |
"Model Name": "google/gemma-2-9b-it", | |
"License": "gemma", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 9.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5579, | |
"Completeness": 0.5544, | |
"Conciseness": 0.1682, | |
"Helpfulness": 0.5352, | |
"Honesty": 0.5436, | |
"Harmlessness": 0.5579, | |
"3C3H Score": 0.4862 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5925, | |
"Reasoning": 0.48, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.45 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-adapted-13b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 13.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6679, | |
"Completeness": 0.6655, | |
"Conciseness": 0.1804, | |
"Helpfulness": 0.6326, | |
"Honesty": 0.652, | |
"Harmlessness": 0.6679, | |
"3C3H Score": 0.5777 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6864, | |
"Reasoning": 0.5711, | |
"Orthographic and Grammatical Analysis": 0.0578, | |
"Safety": 0.5771 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-adapted-70b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 70.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5211, | |
"Completeness": 0.5102, | |
"Conciseness": 0.1339, | |
"Helpfulness": 0.4798, | |
"Honesty": 0.5093, | |
"Harmlessness": 0.5202, | |
"3C3H Score": 0.4457 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5144, | |
"Reasoning": 0.4844, | |
"Orthographic and Grammatical Analysis": 0.0269, | |
"Safety": 0.4312 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-13b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 13.0, | |
"Total Entries": 279, | |
"Successful Entries": 277, | |
"Failed Entries": 2, | |
"Success Ratio": 0.9928 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.3729, | |
"Completeness": 0.3669, | |
"Conciseness": 0.0887, | |
"Helpfulness": 0.3441, | |
"Honesty": 0.3543, | |
"Harmlessness": 0.3711, | |
"3C3H Score": 0.3163 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.348, | |
"Reasoning": 0.3761, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3417 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-2p7b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 3.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5806, | |
"Completeness": 0.5759, | |
"Conciseness": 0.1526, | |
"Helpfulness": 0.5475, | |
"Honesty": 0.5621, | |
"Harmlessness": 0.5806, | |
"3C3H Score": 0.4999 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5812, | |
"Reasoning": 0.5239, | |
"Orthographic and Grammatical Analysis": 0.0282, | |
"Safety": 0.5187 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-30b-8k-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 30.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4755, | |
"Completeness": 0.4731, | |
"Conciseness": 0.1243, | |
"Helpfulness": 0.4522, | |
"Honesty": 0.4597, | |
"Harmlessness": 0.4755, | |
"3C3H Score": 0.41 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4743, | |
"Reasoning": 0.4633, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3542 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-6p7b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 7.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6392, | |
"Completeness": 0.6129, | |
"Conciseness": 0.27, | |
"Helpfulness": 0.6016, | |
"Honesty": 0.6171, | |
"Harmlessness": 0.6383, | |
"3C3H Score": 0.5632 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6465, | |
"Reasoning": 0.6283, | |
"Orthographic and Grammatical Analysis": 0.0591, | |
"Safety": 0.4625 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.1-70B-Instruct", | |
"License": "llama3.1", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 70.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4421, | |
"Completeness": 0.4409, | |
"Conciseness": 0.1416, | |
"Helpfulness": 0.3967, | |
"Honesty": 0.4065, | |
"Harmlessness": 0.4421, | |
"3C3H Score": 0.3783 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.3826, | |
"Reasoning": 0.45, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.6625 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.1-8B-Instruct", | |
"License": "llama3.1", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 8.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.2359, | |
"Completeness": 0.2058, | |
"Conciseness": 0.0581, | |
"Helpfulness": 0.1781, | |
"Honesty": 0.2106, | |
"Harmlessness": 0.2341, | |
"3C3H Score": 0.1871 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.198, | |
"Reasoning": 0.2328, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2229 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Meta-Llama-3-8B-Instruct", | |
"License": "llama3", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 14.963, | |
"Total Entries": 279, | |
"Successful Entries": 277, | |
"Failed Entries": 2, | |
"Success Ratio": 0.9928 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5204, | |
"Completeness": 0.1295, | |
"Conciseness": 0.4149, | |
"Helpfulness": 0.2332, | |
"Honesty": 0.4814, | |
"Harmlessness": 0.5204, | |
"3C3H Score": 0.3833 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4053, | |
"Reasoning": 0.3806, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.8188 | |
} | |
}, | |
"Meta": { | |
"Model Name": "silma-ai/SILMA-9B-Instruct-v1.0", | |
"License": "gemma", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 9.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.542, | |
"Completeness": 0.5156, | |
"Conciseness": 0.2512, | |
"Helpfulness": 0.5033, | |
"Honesty": 0.533, | |
"Harmlessness": 0.542, | |
"3C3H Score": 0.4812 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6009, | |
"Reasoning": 0.4825, | |
"Orthographic and Grammatical Analysis": 0.0309, | |
"Safety": 0.2583 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/aya-23-35B", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 35.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5878, | |
"Completeness": 0.5472, | |
"Conciseness": 0.1738, | |
"Helpfulness": 0.5594, | |
"Honesty": 0.5806, | |
"Harmlessness": 0.5833, | |
"3C3H Score": 0.5054 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6209, | |
"Reasoning": 0.5394, | |
"Orthographic and Grammatical Analysis": 0.0269, | |
"Safety": 0.2354 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/c4ai-command-r-08-2024", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 32.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6282, | |
"Completeness": 0.6221, | |
"Conciseness": 0.1733, | |
"Helpfulness": 0.5978, | |
"Honesty": 0.6119, | |
"Harmlessness": 0.6282, | |
"3C3H Score": 0.5436 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6891, | |
"Reasoning": 0.5333, | |
"Orthographic and Grammatical Analysis": 0.0264, | |
"Safety": 0.2521 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/c4ai-command-r-v01", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 35.0, | |
"Total Entries": 279, | |
"Successful Entries": 277, | |
"Failed Entries": 2, | |
"Success Ratio": 0.9928 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5297, | |
"Completeness": 0.4679, | |
"Conciseness": 0.2876, | |
"Helpfulness": 0.4694, | |
"Honesty": 0.5097, | |
"Harmlessness": 0.5297, | |
"3C3H Score": 0.4657 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5958, | |
"Reasoning": 0.4296, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3171 | |
} | |
}, | |
"Meta": { | |
"Model Name": "FreedomIntelligence/AceGPT-v1.5-13B-Chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 13.0, | |
"Total Entries": 279, | |
"Successful Entries": 275, | |
"Failed Entries": 4, | |
"Success Ratio": 0.9857 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6717, | |
"Completeness": 0.6642, | |
"Conciseness": 0.2906, | |
"Helpfulness": 0.6479, | |
"Honesty": 0.6657, | |
"Harmlessness": 0.6717, | |
"3C3H Score": 0.602 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7136, | |
"Reasoning": 0.5694, | |
"Orthographic and Grammatical Analysis": 0.0632, | |
"Safety": 0.75 | |
} | |
}, | |
"Meta": { | |
"Model Name": "FreedomIntelligence/AceGPT-v2-70B-Chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 70.0, | |
"Total Entries": 279, | |
"Successful Entries": 267, | |
"Failed Entries": 12, | |
"Success Ratio": 0.957 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7103, | |
"Completeness": 0.7091, | |
"Conciseness": 0.1912, | |
"Helpfulness": 0.6888, | |
"Honesty": 0.7036, | |
"Harmlessness": 0.7103, | |
"3C3H Score": 0.6189 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6862, | |
"Reasoning": 0.7472, | |
"Orthographic and Grammatical Analysis": 0.0282, | |
"Safety": 0.5482 | |
} | |
}, | |
"Meta": { | |
"Model Name": "MaziyarPanahi/calme-2.2-qwen2.5-72b", | |
"License": "tongyi-qianwen", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 72.0, | |
"Total Entries": 279, | |
"Successful Entries": 275, | |
"Failed Entries": 4, | |
"Success Ratio": 0.9857 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.2848, | |
"Completeness": 0.2848, | |
"Conciseness": 0.088, | |
"Helpfulness": 0.2553, | |
"Honesty": 0.2531, | |
"Harmlessness": 0.2833, | |
"3C3H Score": 0.2416 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.2384, | |
"Reasoning": 0.2723, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.5486 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-1.5B-Instruct", | |
"License": "qwen", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 1.443, | |
"Total Entries": 279, | |
"Successful Entries": 268, | |
"Failed Entries": 11, | |
"Success Ratio": 0.9606 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6146, | |
"Completeness": 0.6059, | |
"Conciseness": 0.1859, | |
"Helpfulness": 0.5914, | |
"Honesty": 0.5988, | |
"Harmlessness": 0.6146, | |
"3C3H Score": 0.5352 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.566, | |
"Reasoning": 0.6684, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.6009 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-14B-Instruct", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 14.0, | |
"Total Entries": 279, | |
"Successful Entries": 269, | |
"Failed Entries": 10, | |
"Success Ratio": 0.9642 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.8831, | |
"Completeness": 0.8781, | |
"Conciseness": 0.3327, | |
"Helpfulness": 0.8697, | |
"Honesty": 0.8778, | |
"Harmlessness": 0.8831, | |
"3C3H Score": 0.7874 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7896, | |
"Reasoning": 0.77, | |
"Orthographic and Grammatical Analysis": 0.7487, | |
"Safety": 0.9013 | |
} | |
}, | |
"Meta": { | |
"Model Name": "claude-3-5-sonnet-20241022", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 268, | |
"Failed Entries": 11, | |
"Success Ratio": 0.9606 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6389, | |
"Completeness": 0.6377, | |
"Conciseness": 0.1938, | |
"Helpfulness": 0.6162, | |
"Honesty": 0.6316, | |
"Harmlessness": 0.6389, | |
"3C3H Score": 0.5595 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6376, | |
"Reasoning": 0.5767, | |
"Orthographic and Grammatical Analysis": 0.0591, | |
"Safety": 0.6854 | |
} | |
}, | |
"Meta": { | |
"Model Name": "claude-3-haiku-20240307", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 276, | |
"Failed Entries": 3, | |
"Success Ratio": 0.9892 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.2603, | |
"Completeness": 0.2311, | |
"Conciseness": 0.0721, | |
"Helpfulness": 0.2132, | |
"Honesty": 0.2476, | |
"Harmlessness": 0.2594, | |
"3C3H Score": 0.214 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.224, | |
"Reasoning": 0.2934, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.1771 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Meta-Llama-3-70B-Instruct", | |
"License": "llama3", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 70.0, | |
"Total Entries": 279, | |
"Successful Entries": 274, | |
"Failed Entries": 5, | |
"Success Ratio": 0.9821 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.721, | |
"Completeness": 0.7138, | |
"Conciseness": 0.2298, | |
"Helpfulness": 0.7041, | |
"Honesty": 0.7141, | |
"Harmlessness": 0.721, | |
"3C3H Score": 0.634 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6923, | |
"Reasoning": 0.7312, | |
"Orthographic and Grammatical Analysis": 0.1909, | |
"Safety": 0.5229 | |
} | |
}, | |
"Meta": { | |
"Model Name": "gpt-4o-mini", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 276, | |
"Failed Entries": 3, | |
"Success Ratio": 0.9892 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.8375, | |
"Completeness": 0.8291, | |
"Conciseness": 0.2894, | |
"Helpfulness": 0.8099, | |
"Honesty": 0.83, | |
"Harmlessness": 0.8375, | |
"3C3H Score": 0.7389 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.8014, | |
"Reasoning": 0.7455, | |
"Orthographic and Grammatical Analysis": 0.5027, | |
"Safety": 0.6063 | |
} | |
}, | |
"Meta": { | |
"Model Name": "gpt-4o", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 277, | |
"Failed Entries": 2, | |
"Success Ratio": 0.9928 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7194, | |
"Completeness": 0.7181, | |
"Conciseness": 0.1927, | |
"Helpfulness": 0.6921, | |
"Honesty": 0.7099, | |
"Harmlessness": 0.7194, | |
"3C3H Score": 0.6253 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6611, | |
"Reasoning": 0.7922, | |
"Orthographic and Grammatical Analysis": 0.0736, | |
"Safety": 0.5741 | |
} | |
}, | |
"Meta": { | |
"Model Name": "rombodawg/Rombos-LLM-V2.5-Qwen-72b", | |
"License": "qwen", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 72.0, | |
"Total Entries": 279, | |
"Successful Entries": 272, | |
"Failed Entries": 7, | |
"Success Ratio": 0.9749 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7121, | |
"Completeness": 0.7097, | |
"Conciseness": 0.1876, | |
"Helpfulness": 0.6882, | |
"Honesty": 0.6968, | |
"Harmlessness": 0.7121, | |
"3C3H Score": 0.6177 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6815, | |
"Reasoning": 0.7567, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.5667 | |
} | |
}, | |
"Meta": { | |
"Model Name": "MaziyarPanahi/calme-2.1-qwen2.5-72b", | |
"License": "tongyi-qianwen", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 72.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.3285, | |
"Completeness": 0.3225, | |
"Conciseness": 0.0869, | |
"Helpfulness": 0.2987, | |
"Honesty": 0.3081, | |
"Harmlessness": 0.3279, | |
"3C3H Score": 0.2788 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.2945, | |
"Reasoning": 0.3667, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2625 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-1p3b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 1.0, | |
"Total Entries": 279, | |
"Successful Entries": 277, | |
"Failed Entries": 2, | |
"Success Ratio": 0.9928 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5695, | |
"Completeness": 0.5624, | |
"Conciseness": 0.1577, | |
"Helpfulness": 0.5312, | |
"Honesty": 0.554, | |
"Harmlessness": 0.5695, | |
"3C3H Score": 0.4907 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5702, | |
"Reasoning": 0.5139, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.5604 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-30b-16k-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 30.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.1966, | |
"Completeness": 0.1535, | |
"Conciseness": 0.0285, | |
"Helpfulness": 0.1196, | |
"Honesty": 0.1643, | |
"Harmlessness": 0.1957, | |
"3C3H Score": 0.143 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.1577, | |
"Reasoning": 0.1872, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.0875 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-590m-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 0.719, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.0791, | |
"Completeness": 0.0504, | |
"Conciseness": 0.0216, | |
"Helpfulness": 0.0414, | |
"Honesty": 0.0549, | |
"Harmlessness": 0.0755, | |
"3C3H Score": 0.0538 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.0293, | |
"Reasoning": 0.0756, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2417 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.2-1B-Instruct", | |
"License": "llama3.2", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 1.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.2736, | |
"Completeness": 0.2616, | |
"Conciseness": 0.0792, | |
"Helpfulness": 0.1971, | |
"Honesty": 0.2315, | |
"Harmlessness": 0.2727, | |
"3C3H Score": 0.2193 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.2133, | |
"Reasoning": 0.28, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3771 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.2-3B-Instruct", | |
"License": "llama3.2", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 3.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6296, | |
"Completeness": 0.6165, | |
"Conciseness": 0.2258, | |
"Helpfulness": 0.5923, | |
"Honesty": 0.6123, | |
"Harmlessness": 0.6296, | |
"3C3H Score": 0.551 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6538, | |
"Reasoning": 0.6033, | |
"Orthographic and Grammatical Analysis": 0.0309, | |
"Safety": 0.375 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.2-90B-Vision-Instruct", | |
"License": "llama3.2", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 90.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6858, | |
"Completeness": 0.6511, | |
"Conciseness": 0.345, | |
"Helpfulness": 0.635, | |
"Honesty": 0.6747, | |
"Harmlessness": 0.6858, | |
"3C3H Score": 0.6129 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7062, | |
"Reasoning": 0.6394, | |
"Orthographic and Grammatical Analysis": 0.0215, | |
"Safety": 0.7167 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.3-70B-Instruct", | |
"License": "llama3.3", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 70.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.3321, | |
"Completeness": 0.1434, | |
"Conciseness": 0.0403, | |
"Helpfulness": 0.1359, | |
"Honesty": 0.2631, | |
"Harmlessness": 0.3295, | |
"3C3H Score": 0.2074 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.2891, | |
"Reasoning": 0.1744, | |
"Orthographic and Grammatical Analysis": 0.0175, | |
"Safety": 0.0 | |
} | |
}, | |
"Meta": { | |
"Model Name": "stabilityai/ar-stablelm-2-chat", | |
"License": "other", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 2.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5317, | |
"Completeness": 0.4875, | |
"Conciseness": 0.1711, | |
"Helpfulness": 0.4271, | |
"Honesty": 0.4904, | |
"Harmlessness": 0.5317, | |
"3C3H Score": 0.4399 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4885, | |
"Reasoning": 0.4211, | |
"Orthographic and Grammatical Analysis": 0.0323, | |
"Safety": 0.7708 | |
} | |
}, | |
"Meta": { | |
"Model Name": "utter-project/EuroLLM-9B-Instruct", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 9.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6619, | |
"Completeness": 0.6356, | |
"Conciseness": 0.1938, | |
"Helpfulness": 0.6353, | |
"Honesty": 0.6526, | |
"Harmlessness": 0.661, | |
"3C3H Score": 0.5734 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7327, | |
"Reasoning": 0.5506, | |
"Orthographic and Grammatical Analysis": 0.0538, | |
"Safety": 0.2458 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/c4ai-command-r-plus-08-2024", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 104.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"_last_sync_timestamp": "2024-12-15T21:20:51.136159" | |
} | |
] |