|
task,metric,value,err,version
|
|
anli_r1,acc,0.315,0.014696631960792508,0
|
|
anli_r2,acc,0.341,0.014998131348402699,0
|
|
anli_r3,acc,0.3275,0.013553211167251951,0
|
|
arc_challenge,acc,0.2235494880546075,0.012174896631202607,0
|
|
arc_challenge,acc_norm,0.257679180887372,0.012780770562768412,0
|
|
arc_easy,acc,0.5488215488215489,0.010210757101073475,0
|
|
arc_easy,acc_norm,0.5298821548821548,0.010241444322886432,0
|
|
boolq,acc,0.5474006116207951,0.008705669190431184,1
|
|
cb,acc,0.5714285714285714,0.06672848092813058,1
|
|
cb,f1,0.5046621652657205,,1
|
|
copa,acc,0.75,0.04351941398892446,0
|
|
hellaswag,acc,0.3670583549093806,0.004810175357871104,0
|
|
hellaswag,acc_norm,0.44682334196375223,0.004961481380023777,0
|
|
piqa,acc,0.7002176278563657,0.01068968696713809,0
|
|
piqa,acc_norm,0.6969532100108814,0.010722648689531501,0
|
|
rte,acc,0.49097472924187724,0.030091559826331334,0
|
|
sciq,acc,0.875,0.010463483381956722,0
|
|
sciq,acc_norm,0.875,0.010463483381956722,0
|
|
storycloze_2016,acc,0.6445750935328701,0.01106852845239988,0
|
|
winogrande,acc,0.5335438042620363,0.014020826677598101,0
|
|
|