ruochenzhao commited on
Commit
7c43ac7
·
1 Parent(s): 33d87ee

fixed index mistake

Browse files
src/results/auto-arena-llms-results-20240615.csv CHANGED
@@ -4,16 +4,16 @@ Model,Rank,MT-Bench Hard,MT-Bench,LC-AlpacaEval,openLLM,MMLU,From,Open?,Params(B
4
  [meta-llama/Llama-3-70b-chat-hf](https://ai.meta.com/blog/meta-llama-3/),3,41.1,,34.4,77.88,80.06,meta,Yes,70B,-,1079.705,[meta-llama/Llama-3-70b-chat-hf](https://ai.meta.com/blog/meta-llama-3/),meta,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5
  [qwen2-72B-instruct](https://qwenlm.github.io/blog/qwen2/),4,48.1,9.12,,,84.2,Alibaba,Yes,72B,-,1077.809,[千问qwen2-72B-instruct](https://qwenlm.github.io/blog/qwen2/),阿里巴巴,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6
  [minimax-abab6.5-chat](https://platform.minimaxi.com/),5,,,,,78.7,minimax,No,-,4.2,1071.106,[minimax-abab6.5-chat](https://platform.minimaxi.com/),minimax,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7
- [glm-4](https://open.bigmodel.cn/trialcenter?modelCode=glm-4),5,,,,,81.5,Zhipu AI,No,-,13.8,1062.398,[glm-4](https://open.bigmodel.cn/trialcenter?modelCode=glm-4),智谱,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8
- [command-r-plus](https://huggingface.co/CohereForAI/c4ai-command-r-plus),6,33.1,,,74.62,75.7,Cohere,Yes,104B,15,1043.217,[command-r-plus](https://huggingface.co/CohereForAI/c4ai-command-r-plus),Cohere,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9
- [claude-3-haiku-20240307](https://www.anthropic.com/api),7,41.5,9.1,,84.8,75.2,Anthropic,No,-,1.25,1028.622,[claude-3-haiku-20240307](https://www.anthropic.com/api),Anthropic,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10
- [reka-core-20240501](https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model),8,,,,,83.2,Reka AI,No,-,25,1016.344,[reka-core-20240501](https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model),Reka AI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
11
- [Qwen1.5-72B-chat](https://huggingface.co/Qwen/Qwen1.5-72B),9,36.1,8.61,36.6,72.91,77.2,Alibaba,Yes,72B,-,1013.886,[千问Qwen1.5-72B-chat](https://huggingface.co/Qwen/Qwen1.5-72B),阿里巴巴,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
12
- [SenseChat-5](https://console.sensecore.cn/nova/home),10,,,,,84.7,SenseTime,No,-,13.8,1001.696,[日日新SenseChat-5](https://console.sensecore.cn/nova/home),商汤科技,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
13
- [Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1),11,23.4,8.3,23.7,72.71,71.4,Mistral AI,Yes,7B,-,950.769,[Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1),Mistral AI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
14
- [wenxin-4](https://yiyan.baidu.com/),12,,,,,,Baidu,No,-,16.6,945.233,[文心一言wenxin-4](https://yiyan.baidu.com/),百度,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
15
- [zero-one-ai/Yi-34B-Chat](https://huggingface.co/01-ai/Yi-34B-Chat),13,23.1,7,27.2,63.17,74.87,Zero One AI,Yes,34B,-,935.520,[zero-one-ai/Yi-34B-Chat](https://huggingface.co/01-ai/Yi-34B-Chat),零一万物,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
16
- [mistral-large-2402](https://mistral.ai/news/mistral-large/),14,37.7,8.63,32.7,,81.2,Mistral AI,No,-,12,919.873,[mistral-large-2402](https://mistral.ai/news/mistral-large/),Mistral AI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
17
- [GPT-3.5-Turbo-0125](https://openai.com/index/new-embedding-models-and-api-updates/),15,23.3,7.94,17.7,71.02,70,OpenAI,No,-,1.5,880.908,[GPT-3.5-Turbo-0125](https://openai.com/index/new-embedding-models-and-api-updates/),OpenAI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
18
- [deepseek-ai/deepseek-llm-67b-chat](https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat),16,,,17.8,,71.3,Deepseek AI,Yes,67B,-,832.252,[deepseek-ai/deepseek-llm-67b-chat](https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat),深度求索,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
19
- [Llama-2-70b-chat](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf),17,11.6,6.86,14.7,62.4,63.91,Meta,Yes,70B,-,804.969,[Llama-2-70b-chat](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf),Meta,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
 
4
  [meta-llama/Llama-3-70b-chat-hf](https://ai.meta.com/blog/meta-llama-3/),3,41.1,,34.4,77.88,80.06,meta,Yes,70B,-,1079.705,[meta-llama/Llama-3-70b-chat-hf](https://ai.meta.com/blog/meta-llama-3/),meta,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5
  [qwen2-72B-instruct](https://qwenlm.github.io/blog/qwen2/),4,48.1,9.12,,,84.2,Alibaba,Yes,72B,-,1077.809,[千问qwen2-72B-instruct](https://qwenlm.github.io/blog/qwen2/),阿里巴巴,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6
  [minimax-abab6.5-chat](https://platform.minimaxi.com/),5,,,,,78.7,minimax,No,-,4.2,1071.106,[minimax-abab6.5-chat](https://platform.minimaxi.com/),minimax,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7
+ [glm-4](https://open.bigmodel.cn/trialcenter?modelCode=glm-4),6,,,,,81.5,Zhipu AI,No,-,13.8,1062.398,[glm-4](https://open.bigmodel.cn/trialcenter?modelCode=glm-4),智谱,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8
+ [command-r-plus](https://huggingface.co/CohereForAI/c4ai-command-r-plus),7,33.1,,,74.62,75.7,Cohere,Yes,104B,15,1043.217,[command-r-plus](https://huggingface.co/CohereForAI/c4ai-command-r-plus),Cohere,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9
+ [claude-3-haiku-20240307](https://www.anthropic.com/api),8,41.5,9.1,,84.8,75.2,Anthropic,No,-,1.25,1028.622,[claude-3-haiku-20240307](https://www.anthropic.com/api),Anthropic,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10
+ [reka-core-20240501](https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model),9,,,,,83.2,Reka AI,No,-,25,1016.344,[reka-core-20240501](https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model),Reka AI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
11
+ [Qwen1.5-72B-chat](https://huggingface.co/Qwen/Qwen1.5-72B),10,36.1,8.61,36.6,72.91,77.2,Alibaba,Yes,72B,-,1013.886,[千问Qwen1.5-72B-chat](https://huggingface.co/Qwen/Qwen1.5-72B),阿里巴巴,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
12
+ [SenseChat-5](https://console.sensecore.cn/nova/home),11,,,,,84.7,SenseTime,No,-,13.8,1001.696,[日日新SenseChat-5](https://console.sensecore.cn/nova/home),商汤科技,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
13
+ [Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1),12,23.4,8.3,23.7,72.71,71.4,Mistral AI,Yes,7B,-,950.769,[Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1),Mistral AI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
14
+ [wenxin-4](https://yiyan.baidu.com/),13,,,,,,Baidu,No,-,16.6,945.233,[文心一言wenxin-4](https://yiyan.baidu.com/),百度,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
15
+ [zero-one-ai/Yi-34B-Chat](https://huggingface.co/01-ai/Yi-34B-Chat),14,23.1,7,27.2,63.17,74.87,Zero One AI,Yes,34B,-,935.520,[zero-one-ai/Yi-34B-Chat](https://huggingface.co/01-ai/Yi-34B-Chat),零一万物,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
16
+ [mistral-large-2402](https://mistral.ai/news/mistral-large/),15,37.7,8.63,32.7,,81.2,Mistral AI,No,-,12,919.873,[mistral-large-2402](https://mistral.ai/news/mistral-large/),Mistral AI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
17
+ [GPT-3.5-Turbo-0125](https://openai.com/index/new-embedding-models-and-api-updates/),16,23.3,7.94,17.7,71.02,70,OpenAI,No,-,1.5,880.908,[GPT-3.5-Turbo-0125](https://openai.com/index/new-embedding-models-and-api-updates/),OpenAI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
18
+ [deepseek-ai/deepseek-llm-67b-chat](https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat),17,,,17.8,,71.3,Deepseek AI,Yes,67B,-,832.252,[deepseek-ai/deepseek-llm-67b-chat](https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat),深度求索,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
19
+ [Llama-2-70b-chat](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf),18,11.6,6.86,14.7,62.4,63.91,Meta,Yes,70B,-,804.969,[Llama-2-70b-chat](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf),Meta,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,