|
task,metric,value,err,version
|
|
anli_r1,acc,0.327,0.01484221315341124,0
|
|
anli_r2,acc,0.332,0.014899597242811492,0
|
|
anli_r3,acc,0.34,0.01368049572576779,0
|
|
arc_challenge,acc,0.28071672354948807,0.013131238126975586,0
|
|
arc_challenge,acc_norm,0.31313993174061433,0.013552671543623504,0
|
|
arc_easy,acc,0.6031144781144782,0.010039236800583209,0
|
|
arc_easy,acc_norm,0.5723905723905723,0.010151683397430673,0
|
|
boolq,acc,0.5788990825688073,0.008635491562221344,1
|
|
cb,acc,0.5,0.06741998624632421,1
|
|
cb,f1,0.35057471264367807,,1
|
|
copa,acc,0.8,0.040201512610368445,0
|
|
hellaswag,acc,0.4731129257120096,0.0049825618152141244,0
|
|
hellaswag,acc_norm,0.6270663214499104,0.004825963768772216,0
|
|
piqa,acc,0.7589771490750816,0.009979042717267314,0
|
|
piqa,acc_norm,0.7616974972796517,0.009940334245876219,0
|
|
rte,acc,0.5306859205776173,0.03003973059219781,0
|
|
sciq,acc,0.892,0.009820001651345696,0
|
|
sciq,acc_norm,0.89,0.009899393819724446,0
|
|
storycloze_2016,acc,0.7140566541956174,0.010449259851345842,0
|
|
winogrande,acc,0.574585635359116,0.013895257666646378,0
|
|
|