|
task,metric,value,err,version
|
|
anli_r1,acc,0.336,0.014944140233795027,0
|
|
anli_r2,acc,0.34,0.014987482264363937,0
|
|
anli_r3,acc,0.3516666666666667,0.013789711695404801,0
|
|
arc_challenge,acc,0.26109215017064846,0.012835523909473847,0
|
|
arc_challenge,acc_norm,0.29948805460750855,0.013385021637313572,0
|
|
arc_easy,acc,0.6064814814814815,0.010024426884292557,0
|
|
arc_easy,acc_norm,0.5917508417508418,0.010085566195791252,0
|
|
boolq,acc,0.5605504587155963,0.008680693125810188,1
|
|
cb,acc,0.39285714285714285,0.0658538889806635,1
|
|
cb,f1,0.33413848631239934,,1
|
|
copa,acc,0.71,0.045604802157206845,0
|
|
hellaswag,acc,0.4049990041824338,0.004898886080687925,0
|
|
hellaswag,acc_norm,0.5279824736108345,0.004981961097590808,0
|
|
piqa,acc,0.7165397170837867,0.010515057791152076,0
|
|
piqa,acc_norm,0.7236126224156693,0.01043416238827561,0
|
|
rte,acc,0.49097472924187724,0.030091559826331334,0
|
|
sciq,acc,0.915,0.00882342636694232,0
|
|
sciq,acc_norm,0.911,0.009008893392651525,0
|
|
storycloze_2016,acc,0.6734366648850882,0.010844543793668893,0
|
|
winogrande,acc,0.5422257300710339,0.014002284504422438,0
|
|
|