|
task,metric,value,err,version
|
|
anli_r1,acc,0.308,0.01460648312734276,0
|
|
anli_r2,acc,0.338,0.014965960710224482,0
|
|
anli_r3,acc,0.3358333333333333,0.013639261190932884,0
|
|
arc_challenge,acc,0.22610921501706485,0.012224202097063276,0
|
|
arc_challenge,acc_norm,0.26109215017064846,0.012835523909473855,0
|
|
arc_easy,acc,0.5361952861952862,0.01023286555034673,0
|
|
arc_easy,acc_norm,0.5126262626262627,0.010256511718330589,0
|
|
boolq,acc,0.5318042813455658,0.008727345583419184,1
|
|
cb,acc,0.44642857142857145,0.067031892279424,1
|
|
cb,f1,0.3116701607267645,,1
|
|
copa,acc,0.69,0.04648231987117316,0
|
|
hellaswag,acc,0.3512248556064529,0.004763774981834667,0
|
|
hellaswag,acc_norm,0.4387572196773551,0.00495220983185659,0
|
|
piqa,acc,0.6953210010881393,0.010738889044325161,0
|
|
piqa,acc_norm,0.6942328618063112,0.010749627366141639,0
|
|
rte,acc,0.4981949458483754,0.030096267148976626,0
|
|
sciq,acc,0.878,0.010354864712936703,0
|
|
sciq,acc_norm,0.867,0.010743669132397346,0
|
|
storycloze_2016,acc,0.6317477284874399,0.011153823258531738,0
|
|
winogrande,acc,0.5122336227308603,0.014048278820405616,0
|
|
|