|
task,metric,value,err,version
|
|
anli_r1,acc,0.324,0.014806864733738856,0
|
|
anli_r2,acc,0.333,0.014910846164229873,0
|
|
anli_r3,acc,0.33666666666666667,0.013647602942406401,0
|
|
arc_challenge,acc,0.29948805460750855,0.013385021637313565,0
|
|
arc_challenge,acc_norm,0.3387372013651877,0.01383056892797433,0
|
|
arc_easy,acc,0.6439393939393939,0.00982545460841631,0
|
|
arc_easy,acc_norm,0.640993265993266,0.009843424713072174,0
|
|
boolq,acc,0.5883792048929664,0.008607357686607963,1
|
|
cb,acc,0.35714285714285715,0.0646095738380922,1
|
|
cb,f1,0.2275946275946276,,1
|
|
copa,acc,0.8,0.040201512610368445,0
|
|
hellaswag,acc,0.4509061939852619,0.004965670398127354,0
|
|
hellaswag,acc_norm,0.5998805018920533,0.004889210628907973,0
|
|
piqa,acc,0.750816104461371,0.010091882770120216,0
|
|
piqa,acc_norm,0.750272034820457,0.010099232969867472,0
|
|
rte,acc,0.4584837545126354,0.029992535385373314,0
|
|
sciq,acc,0.922,0.008484573530118583,0
|
|
sciq,acc_norm,0.93,0.008072494358323499,0
|
|
storycloze_2016,acc,0.7081774452164618,0.010512588616199622,0
|
|
winogrande,acc,0.5824782951854776,0.013859978264440248,0
|
|
|