|
task,metric,value,err,version
|
|
anli_r1,acc,0.293,0.014399942998441271,0
|
|
anli_r2,acc,0.327,0.01484221315341124,0
|
|
anli_r3,acc,0.3383333333333333,0.013664144006618275,0
|
|
arc_challenge,acc,0.33447098976109213,0.013787460322441384,0
|
|
arc_challenge,acc_norm,0.3438566552901024,0.01388064457015621,0
|
|
arc_easy,acc,0.6759259259259259,0.009603728850095394,0
|
|
arc_easy,acc_norm,0.640993265993266,0.009843424713072176,0
|
|
boolq,acc,0.6669724770642201,0.00824302391268888,1
|
|
cb,acc,0.32142857142857145,0.06297362289056341,1
|
|
cb,f1,0.2706949089557785,,1
|
|
copa,acc,0.77,0.042295258468165065,0
|
|
hellaswag,acc,0.5265883290181239,0.0049827214724073405,0
|
|
hellaswag,acc_norm,0.7029476199960167,0.00456025908319738,0
|
|
piqa,acc,0.7763873775843307,0.009721489519176294,0
|
|
piqa,acc_norm,0.7883569096844396,0.009530351270479392,0
|
|
rte,acc,0.5595667870036101,0.029882123363118723,0
|
|
sciq,acc,0.928,0.008178195576218681,0
|
|
sciq,acc_norm,0.911,0.009008893392651523,0
|
|
storycloze_2016,acc,0.7413148049171566,0.010126662138021714,0
|
|
winogrande,acc,0.6243093922651933,0.013611257508380444,0
|
|
|