Add new lb data (#1)
Browse files- Add new lb data (dcb02ce8e23780c2cf70e88647423ee07a62db1e)
Co-authored-by: Artur Kazukevich <ArturKaz@users.noreply.huggingface.co>
- elo_results_20240922.pkl +3 -0
- leaderboard_table_20240922.csv +29 -0
elo_results_20240922.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:534892d62c624ecce06b73d987981d8d0fa60f47b6e0fb32b629e192753ea50e
|
3 |
+
size 1100182
|
leaderboard_table_20240922.csv
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
rating,variance,rating_q975,rating_q025,num_battles,final_ranking,key,Model,License,Organization,Knowledge cutoff date,Link,MT-bench (score),MMLU
|
2 |
+
1032.510104410189,69.30141080455054,1048.814538709025,1016.6622546106759,1284,5,gpt-4o-2024-05-13,gpt-4o-2024-05-13,Proprietary,OpenAI,10-2023,https://openai.com/api/,70.0,50.0
|
3 |
+
1082.4712318089776,67.30706532383378,1098.8214377737013,1066.2443068192063,1368,2,Claude 3.5 Sonnet,Claude 3.5 Sonnet,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
|
4 |
+
1067.7972710707584,77.54813124058781,1085.7154378048815,1050.9962401597693,1079,2,gpt-4-turbo-2024-04-09,gpt-4-turbo-2024-04-09,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
|
5 |
+
896.0996954823132,48.146002288922574,908.9102856490172,881.0794870488638,1755,24,Gemma 2 27B,Gemma 2 27B,Proprietary,Google,-,https://blog.google/technology/developers/google-gemma-2/,-,-
|
6 |
+
910.4638032291534,65.03295018106758,925.6533766372329,894.8531519962243,1412,23,gpt-4o-mini-2024-07-18,gpt-4o-mini-2024-07-18,Proprietary,OpenAI,07-2024,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/,70.0,50.0
|
7 |
+
882.2024127696782,46.727465400148965,895.6814483653017,868.8792324665123,1762,25,Llama 3.1 405B Instruct Turbo,Llama 3.1 405B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
|
8 |
+
927.3426646177923,43.14190671241539,938.9460617268497,913.8808130950015,1840,23,T-lite-instruct-0.1,T-lite-instruct-0.1,Open Source,t-bank-ai,In training,https://huggingface.co/AnatoliiPotapov/T-lite-instruct-0.1,-,-
|
9 |
+
921.6751659478813,54.47284854891922,935.6184297233434,906.4181069570828,1451,23,YandexGPT Experimental,YandexGPT Experimental,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
|
10 |
+
1034.2403623834216,71.5203151616652,1051.3802550185828,1017.8667965213967,1302,4,saiga_llama3_70b,saiga_llama3_70b,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_70b_sft_m1_d5_abliterated_awq_4bit,-,-
|
11 |
+
1001.9675392748777,58.16834454394786,1016.1137185526782,985.9628683255005,1321,12,LLaMA-3 Chat (70B),LLaMA-3 Chat (70B),Proprietary,Meta,12-2023,https://llama.meta.com/llama3/,-,-
|
12 |
+
1061.6973041034578,74.44927336719411,1078.8048207556594,1044.1728155446337,1130,2,Claude 3 Haiku,Claude 3 Haiku,Proprietary,Anthropic,03-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-family,-,-
|
13 |
+
1031.9462189087722,69.98556383299538,1048.6037483711063,1016.0340615760772,1153,5,Llama 3.1 70B Instruct Turbo,Llama 3.1 70B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
|
14 |
+
955.0563496085472,68.44542811973741,972.0221098403483,939.4803252154242,1159,16,saiga_llama3_8b_v7,saiga_llama3_8b_v7,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
|
15 |
+
981.471697758703,64.30691200155506,997.4484997816128,965.4026452905196,1308,14,LLaMA-3 Chat (8B),LLaMA-3 Chat (8B),Proprietary,Meta,03-2023,https://llama.meta.com/llama3/,-,-
|
16 |
+
1057.705029563665,63.49803703318486,1071.780406958427,1041.7816283423076,1268,2,gpt-4-0613,gpt-4-0613,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
|
17 |
+
987.184154619155,85.96918634765167,1004.8623756385316,969.1089500387948,1006,13,Vikhrmodels/it-5.2-fp16-cp,Vikhrmodels/it-5.2-fp16-cp,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/it-5.2-fp16-cp,-,-
|
18 |
+
958.7563966922742,48.555428719736426,972.4785448198948,946.1533164856768,1709,16,saiga_llama3_8b_v6,saiga_llama3_8b_v6,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
|
19 |
+
974.2212216312174,45.179095658747514,987.9471753763779,961.1689174212224,1695,14,saiga_phi3_medium,saiga_phi3_medium,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_phi3_medium_sft_m1_d2_kto_m5_d7,-,-
|
20 |
+
1051.2085428876214,59.1803804872065,1065.6734189843075,1036.0902842279463,1337,4,Qwen 2 Instruct (72B),Qwen 2 Instruct (72B),Open Source,Qwen,12-2023,https://llama.meta.com/llama3/,-,-
|
21 |
+
907.1586684775945,46.24986937585701,920.2493043538082,894.2276742569329,1806,23,YandexGPT 3 Pro,YandexGPT 3 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,65.2,45.2
|
22 |
+
992.0780047580829,48.43885512799292,1005.7889645006121,978.1166944158903,1849,13,YandexGPT 3 Lite,YandexGPT 3 Lite,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
|
23 |
+
1080.2118761065628,47.88274587371624,1093.9534401468022,1066.5039195873696,1863,2,Llama 3.1 8B Instruct Turbo,Llama 3.1 8B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
|
24 |
+
1115.2560110717836,49.43911399309643,1128.8268501868852,1101.5169872321792,1953,1,GigaChat-Pro 2.2.25.3,GigaChat-Pro 2.2.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
25 |
+
1061.7490064178544,60.0958115780098,1077.4817813316438,1046.4898751639232,1415,2,GigaChat-Pro 4.0.26.8,GigaChat-Pro 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
26 |
+
1037.5279712026681,58.09261960189402,1053.5943080787135,1023.3997787149007,1427,4,GigaChat 4.0.26.8,GigaChat 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
27 |
+
986.328273904818,95.01290787255834,1005.380308207033,967.1441094053149,856,13,gpt-3.5-turbo-0125,gpt-3.5-turbo-0125,Proprietary,OpenAI,09-2021,https://openai.com/api/,65.2,45.2
|
28 |
+
1019.7804129369036,70.96698604809858,1036.5171599962364,1003.4378801544037,1221,8,GigaChat 3.1.25.3,GigaChat 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
29 |
+
983.8926083552761,61.397216578584825,1000.7090172077129,969.8633235700088,1249,14,GigaChat-Plus 3.1.25.3,GigaChat-Plus 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|