|
task,metric,value,err,version
|
|
anli_r1,acc,0.332,0.01489959724281149,0
|
|
anli_r2,acc,0.362,0.015204840912919503,0
|
|
anli_r3,acc,0.33416666666666667,0.013622434813136788,0
|
|
arc_challenge,acc,0.28071672354948807,0.013131238126975578,0
|
|
arc_challenge,acc_norm,0.3046075085324232,0.013449522109932489,0
|
|
arc_easy,acc,0.6014309764309764,0.010046455400477943,0
|
|
arc_easy,acc_norm,0.585016835016835,0.01011038315196114,0
|
|
boolq,acc,0.5688073394495413,0.008661853128165595,1
|
|
cb,acc,0.4642857142857143,0.06724777654937658,1
|
|
cb,f1,0.4217687074829932,,1
|
|
copa,acc,0.71,0.045604802157206845,0
|
|
hellaswag,acc,0.40420235012945627,0.004897340793314381,0
|
|
hellaswag,acc_norm,0.5269866560446126,0.004982508198584267,0
|
|
piqa,acc,0.7274211099020674,0.010389256803296023,0
|
|
piqa,acc_norm,0.7290533188248096,0.010369718937426844,0
|
|
rte,acc,0.5776173285198556,0.02973162264649588,0
|
|
sciq,acc,0.918,0.008680515615523727,0
|
|
sciq,acc_norm,0.908,0.009144376393151098,0
|
|
storycloze_2016,acc,0.6675574559059326,0.01089386077834354,0
|
|
winogrande,acc,0.5351223362273086,0.014017773120881585,0
|
|
|