|
task,metric,value,err,version
|
|
anli_r1,acc,0.309,0.014619600977206493,0
|
|
anli_r2,acc,0.325,0.014818724459095524,0
|
|
anli_r3,acc,0.32916666666666666,0.013570806258433625,0
|
|
arc_challenge,acc,0.27986348122866894,0.01311904089772592,0
|
|
arc_challenge,acc_norm,0.3191126279863481,0.013621696119173304,0
|
|
arc_easy,acc,0.6292087542087542,0.009911292822056923,0
|
|
arc_easy,acc_norm,0.617003367003367,0.009974920384536482,0
|
|
boolq,acc,0.6305810397553517,0.008441557531799614,1
|
|
cb,acc,0.48214285714285715,0.0673769750864465,1
|
|
cb,f1,0.3338164251207729,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.4766978689504083,0.004984359669951929,0
|
|
hellaswag,acc_norm,0.6308504282015535,0.004815882719278398,0
|
|
piqa,acc,0.7611534276387377,0.009948120385337494,0
|
|
piqa,acc_norm,0.7665941240478781,0.009869247889520993,0
|
|
rte,acc,0.5054151624548736,0.030094698123239966,0
|
|
sciq,acc,0.902,0.009406619184621252,0
|
|
sciq,acc_norm,0.885,0.01009340759490462,0
|
|
storycloze_2016,acc,0.7252805986103688,0.010322309878339504,0
|
|
winogrande,acc,0.5832675611681136,0.013856250072796318,0
|
|
|