|
task,metric,value,err,version
|
|
anli_r1,acc,0.304,0.014553205687950434,0
|
|
anli_r2,acc,0.332,0.014899597242811482,0
|
|
anli_r3,acc,0.34833333333333333,0.013759437498874061,0
|
|
arc_challenge,acc,0.2508532423208191,0.01266819862131543,0
|
|
arc_challenge,acc_norm,0.2764505119453925,0.013069662474252425,0
|
|
arc_easy,acc,0.5096801346801347,0.010257860554461122,0
|
|
arc_easy,acc_norm,0.46296296296296297,0.010231597249131062,0
|
|
boolq,acc,0.6155963302752293,0.008508133844703919,1
|
|
cb,acc,0.42857142857142855,0.06672848092813058,1
|
|
cb,f1,0.30465949820788535,,1
|
|
copa,acc,0.77,0.042295258468165065,0
|
|
hellaswag,acc,0.45429197371041624,0.004968888130290068,0
|
|
hellaswag,acc_norm,0.5927106154152559,0.004903254264177628,0
|
|
piqa,acc,0.6953210010881393,0.010738889044325161,0
|
|
piqa,acc_norm,0.6953210010881393,0.010738889044325161,0
|
|
rte,acc,0.5595667870036101,0.02988212336311872,0
|
|
sciq,acc,0.827,0.011967214137559941,0
|
|
sciq,acc_norm,0.789,0.01290913032104209,0
|
|
storycloze_2016,acc,0.6734366648850882,0.010844543793668893,0
|
|
winogrande,acc,0.5603788476716653,0.013949649776015696,0
|
|
|