CZ-EVAL / leaderboard /table.csv
Hynek Kydlicek
new models
acc5a5f
model_name,analytical,critical,culture,verbal,klokan
anthropic/claude-2.1,0.3804034582132565,0.6449912126537786,0.7981770833333334,0.6336336336336337,0.3823884197828709
anthropic/claude-3-haiku,0.3323727185398655,0.6045694200351494,0.81640625,0.6246246246246246,0.32226322263222634
anthropic/claude-3-opus,0.47262247838616717,0.7644991212653779,0.9244791666666666,0.8018018018018018,0.5781057810578106
anthropic/claude-3-sonnet,0.37848222862632086,0.6889279437609842,0.8346354166666666,0.6126126126126126,0.44280442804428044
cohere/command-r,0.27857829010566765,0.5342706502636204,0.7044270833333334,0.4444444444444444,0.24846248462484624
google/gemini-pro,0.28914505283381364,0.6098418277680141,0.8072916666666666,0.6096096096096096,0.2865928659286593
google/gemma-7b-it,0.2219020172910663,0.27943760984182775,0.22916666666666666,0.22822822822822822,0.16974169741697417
mistralai/mistral-large,0.3852065321805956,0.6678383128295254,0.859375,0.6276276276276276,0.4108241082410824
mistralai/mistral-medium,0.3121998078770413,0.5957820738137083,0.7734375,0.5045045045045045,0.2939729397293973
mistralai/mixtral-8x7b-instruct,0.2526416906820365,0.5114235500878734,0.7122395833333334,0.43543543543543545,0.26691266912669126
openai/gpt-3.5-turbo,0.3045148895292987,0.4991212653778559,0.7213541666666666,0.44744744744744747,0.3247232472324723
openai/gpt-4-1106-preview,0.515850144092219,0.7065026362038664,0.90234375,0.7267267267267268,0.5805658056580566