WildBench-V1-legacy / model_len_info.json
yuchenlin's picture
remove winrates and update the length penalty method
960f5ed
raw
history blame
2.25 kB
{
"Llama-2-13b-chat-hf.nosp": {
"avg_len": 2943.346238938053,
"empty_output": 120,
"num_samples": 1024
},
"Llama-2-70b-chat-hf.nosp": {
"avg_len": 3077.0840707964603,
"empty_output": 120,
"num_samples": 1024
},
"Llama-2-7b-chat-hf.nosp": {
"avg_len": 2965.4059734513276,
"empty_output": 120,
"num_samples": 1024
},
"Llama-2-7b-chat-hf": {
"avg_len": 2137.34,
"empty_output": 124,
"num_samples": 1024
},
"Mistral-7B-Instruct-v0.1": {
"avg_len": 2208.8115234375,
"empty_output": 0,
"num_samples": 1024
},
"Mistral-7B-Instruct-v0.2": {
"avg_len": 2852.33203125,
"empty_output": 0,
"num_samples": 1024
},
"Mixtral-8x7B-Instruct-v0.1": {
"avg_len": 2483.9638671875,
"empty_output": 0,
"num_samples": 1024
},
"Nous-Hermes-2-Mixtral-8x7B-DPO": {
"avg_len": 2878.79296875,
"empty_output": 0,
"num_samples": 1024
},
"Yi-34B-Chat": {
"avg_len": 2899.1797133406835,
"empty_output": 117,
"num_samples": 1024
},
"gemini-1.0-pro": {
"avg_len": 2407.559462254395,
"empty_output": 57,
"num_samples": 1024
},
"gemma-7b-it": {
"avg_len": 1960.829244357213,
"empty_output": 5,
"num_samples": 1024
},
"gpt-3.5-turbo-0125": {
"avg_len": 1725.7216796875,
"empty_output": 0,
"num_samples": 1024
},
"gpt-4-0125-preview": {
"avg_len": 3190.716796875,
"empty_output": 0,
"num_samples": 1024
},
"tulu-2-dpo-70b": {
"avg_len": 2630.2337917485265,
"empty_output": 6,
"num_samples": 1024
},
"vicuna-13b-v1.5": {
"avg_len": 1864.2749445676275,
"empty_output": 122,
"num_samples": 1024
},
"zephyr-7b-beta": {
"avg_len": 3011.2529296875,
"empty_output": 0,
"num_samples": 1024
},
"mistral-large-2402": {
"avg_len": 2352.189453125,
"empty_output": 0,
"num_samples": 1024
},
"claude-3-opus-20240229": {
"avg_len": 2460.330078125,
"empty_output": 0,
"num_samples": 1024
},
"claude-3-sonnet-20240229": {
"avg_len": 2456.21484375,
"empty_output": 0,
"num_samples": 1024
},
"zephyr-7b-gemma-v0.1": {
"avg_len": 2551.9842983316976,
"empty_output": 5,
"num_samples": 1024
}
}