|
task,metric,value,err,version
|
|
anli_r1,acc,0.335,0.014933117490932577,0
|
|
anli_r2,acc,0.337,0.01495508791865361,0
|
|
anli_r3,acc,0.35083333333333333,0.013782212417178199,0
|
|
arc_challenge,acc,0.27474402730375425,0.013044617212771227,0
|
|
arc_challenge,acc_norm,0.3165529010238908,0.01359243151906808,0
|
|
arc_easy,acc,0.5951178451178452,0.010072423960395701,0
|
|
arc_easy,acc_norm,0.5585016835016835,0.010189314382749934,0
|
|
boolq,acc,0.6134556574923548,0.008516943934341978,1
|
|
cb,acc,0.4107142857142857,0.06633634150359541,1
|
|
cb,f1,0.28810120539443845,,1
|
|
copa,acc,0.8,0.040201512610368445,0
|
|
hellaswag,acc,0.4658434574785899,0.004978124945759844,0
|
|
hellaswag,acc_norm,0.6102370045807608,0.0048669971103881965,0
|
|
piqa,acc,0.7459194776931447,0.010157271999135041,0
|
|
piqa,acc_norm,0.7595212187159956,0.009971345364651068,0
|
|
rte,acc,0.5776173285198556,0.029731622646495887,0
|
|
sciq,acc,0.855,0.011139977517890134,0
|
|
sciq,acc_norm,0.794,0.012795613612786548,0
|
|
storycloze_2016,acc,0.729021913415286,0.010278188399635051,0
|
|
winogrande,acc,0.5935280189423836,0.013804448697753376,0
|
|
|