|
task,metric,value,err,version
|
|
anli_r1,acc,0.32,0.014758652303574886,0
|
|
anli_r2,acc,0.346,0.015050266127564443,0
|
|
anli_r3,acc,0.35,0.013774667009018554,0
|
|
arc_challenge,acc,0.2858361774744027,0.01320319608853737,0
|
|
arc_challenge,acc_norm,0.3122866894197952,0.013542598541688065,0
|
|
arc_easy,acc,0.5976430976430976,0.010062244711011518,0
|
|
arc_easy,acc_norm,0.5913299663299664,0.010087174498762886,0
|
|
boolq,acc,0.6238532110091743,0.008472516562330725,1
|
|
cb,acc,0.39285714285714285,0.0658538889806635,1
|
|
cb,f1,0.3130977130977131,,1
|
|
copa,acc,0.77,0.04229525846816506,0
|
|
hellaswag,acc,0.4643497311292571,0.004977081808179426,0
|
|
hellaswag,acc_norm,0.603963353913563,0.004880726787988643,0
|
|
piqa,acc,0.749183895538629,0.010113869547069044,0
|
|
piqa,acc_norm,0.7453754080522307,0.010164432237060499,0
|
|
rte,acc,0.5379061371841155,0.030009848912529117,0
|
|
sciq,acc,0.866,0.01077776229836968,0
|
|
sciq,acc_norm,0.859,0.011010914595992436,0
|
|
storycloze_2016,acc,0.706574024585783,0.010529489334744466,0
|
|
winogrande,acc,0.5659037095501184,0.013929882555694058,0
|
|
|