|
task,metric,value,err,version
|
|
anli_r1,acc,0.335,0.014933117490932577,0
|
|
anli_r2,acc,0.337,0.014955087918653609,0
|
|
anli_r3,acc,0.3325,0.013605417345710528,0
|
|
arc_challenge,acc,0.29948805460750855,0.013385021637313563,0
|
|
arc_challenge,acc_norm,0.3361774744027304,0.013804855026205763,0
|
|
arc_easy,acc,0.6380471380471381,0.00986099146668847,0
|
|
arc_easy,acc_norm,0.6216329966329966,0.00995157568333195,0
|
|
boolq,acc,0.6192660550458715,0.008492625561656215,1
|
|
cb,acc,0.23214285714285715,0.056929390240001085,1
|
|
cb,f1,0.23148148148148148,,1
|
|
copa,acc,0.84,0.0368452949177471,0
|
|
hellaswag,acc,0.47470623381796456,0.004983392650570959,0
|
|
hellaswag,acc_norm,0.6319458275243975,0.004812905279066442,0
|
|
piqa,acc,0.7529923830250272,0.010062268140772625,0
|
|
piqa,acc_norm,0.7568008705114254,0.010009611953858917,0
|
|
rte,acc,0.47653429602888087,0.030063300411902652,0
|
|
sciq,acc,0.915,0.008823426366942323,0
|
|
sciq,acc_norm,0.91,0.009054390204866444,0
|
|
storycloze_2016,acc,0.721004810261892,0.010371620932652795,0
|
|
winogrande,acc,0.6108918705603789,0.013702520871485949,0
|
|
|