|
task,metric,value,err,version
|
|
anli_r1,acc,0.331,0.014888272588203945,0
|
|
anli_r2,acc,0.35,0.015090650341444231,0
|
|
anli_r3,acc,0.33416666666666667,0.013622434813136769,0
|
|
arc_challenge,acc,0.28924914675767915,0.013250012579393443,0
|
|
arc_challenge,acc_norm,0.30887372013651876,0.013501770929344003,0
|
|
arc_easy,acc,0.6102693602693603,0.01000716939179705,0
|
|
arc_easy,acc_norm,0.5993265993265994,0.010055304474255582,0
|
|
boolq,acc,0.5519877675840978,0.008697655510897228,1
|
|
cb,acc,0.375,0.06527912098338669,1
|
|
cb,f1,0.26182156999767064,,1
|
|
copa,acc,0.75,0.04351941398892446,0
|
|
hellaswag,acc,0.468034256124278,0.0049795737655758555,0
|
|
hellaswag,acc_norm,0.6188010356502689,0.00484688692976345,0
|
|
piqa,acc,0.7529923830250272,0.010062268140772622,0
|
|
piqa,acc_norm,0.7584330794341676,0.00998671800180446,0
|
|
rte,acc,0.5342960288808665,0.03002557981936643,0
|
|
sciq,acc,0.883,0.010169287802713329,0
|
|
sciq,acc_norm,0.865,0.010811655372416053,0
|
|
storycloze_2016,acc,0.7151256012827365,0.01043751398661172,0
|
|
winogrande,acc,0.5769534333070244,0.013885055359056472,0
|
|
|