|
task,metric,value,err,version
|
|
anli_r1,acc,0.323,0.014794927843348633,0
|
|
anli_r2,acc,0.317,0.014721675438880236,0
|
|
anli_r3,acc,0.3625,0.013883037874225516,0
|
|
arc_challenge,acc,0.2790102389078498,0.013106784883601333,0
|
|
arc_challenge,acc_norm,0.30802047781569963,0.013491429517292038,0
|
|
arc_easy,acc,0.5942760942760943,0.010075755540128873,0
|
|
arc_easy,acc_norm,0.5757575757575758,0.010141333654958552,0
|
|
boolq,acc,0.5755351681957187,0.008644688121685498,1
|
|
cb,acc,0.35714285714285715,0.06460957383809221,1
|
|
cb,f1,0.19573820395738203,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.4592710615415256,0.004973199296339971,0
|
|
hellaswag,acc_norm,0.6106353316072496,0.00486609688094144,0
|
|
piqa,acc,0.7540805223068553,0.010047331865625194,0
|
|
piqa,acc_norm,0.7589771490750816,0.009979042717267314,0
|
|
rte,acc,0.5126353790613718,0.030086851767188564,0
|
|
sciq,acc,0.835,0.01174363286691616,0
|
|
sciq,acc_norm,0.788,0.01293148186493805,0
|
|
storycloze_2016,acc,0.7194013896312133,0.01038980964728882,0
|
|
winogrande,acc,0.585635359116022,0.013844846232268565,0
|
|
|