|
task,metric,value,err,version
|
|
anli_r1,acc,0.335,0.014933117490932575,0
|
|
anli_r2,acc,0.335,0.014933117490932573,0
|
|
anli_r3,acc,0.3325,0.01360541734571053,0
|
|
arc_challenge,acc,0.22525597269624573,0.0122078399954073,0
|
|
arc_challenge,acc_norm,0.24658703071672355,0.012595726268790134,0
|
|
arc_easy,acc,0.5210437710437711,0.010250692602022559,0
|
|
arc_easy,acc_norm,0.4692760942760943,0.01024039558481524,0
|
|
boolq,acc,0.5966360856269113,0.00858016855488973,1
|
|
cb,acc,0.4107142857142857,0.0663363415035954,1
|
|
cb,f1,0.1940928270042194,,1
|
|
copa,acc,0.7,0.046056618647183814,0
|
|
hellaswag,acc,0.3520215096594304,0.004766245539606642,0
|
|
hellaswag,acc_norm,0.4331806413065126,0.0049450236570322765,0
|
|
piqa,acc,0.6942328618063112,0.010749627366141629,0
|
|
piqa,acc_norm,0.6942328618063112,0.010749627366141639,0
|
|
rte,acc,0.5126353790613718,0.030086851767188564,0
|
|
sciq,acc,0.825,0.012021627157731975,0
|
|
sciq,acc_norm,0.71,0.014356395999905687,0
|
|
storycloze_2016,acc,0.6424371993586317,0.011083341168827782,0
|
|
winogrande,acc,0.5272296764009471,0.014031631629827696,0
|
|
|