|
task,metric,value,err,version
|
|
anli_r1,acc,0.318,0.014734079309311901,0
|
|
anli_r2,acc,0.347,0.015060472031706618,0
|
|
anli_r3,acc,0.3275,0.013553211167251953,0
|
|
arc_challenge,acc,0.30716723549488056,0.013481034054980945,0
|
|
arc_challenge,acc_norm,0.33276450511945393,0.013769863046192314,0
|
|
arc_easy,acc,0.6254208754208754,0.00993175882041061,0
|
|
arc_easy,acc_norm,0.5993265993265994,0.010055304474255585,0
|
|
boolq,acc,0.636697247706422,0.008411885836787163,1
|
|
cb,acc,0.35714285714285715,0.0646095738380922,1
|
|
cb,f1,0.26868521549372615,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.47470623381796456,0.004983392650570956,0
|
|
hellaswag,acc_norm,0.6330412268472416,0.004809901151234834,0
|
|
piqa,acc,0.749727965179543,0.010106561880089786,0
|
|
piqa,acc_norm,0.76550598476605,0.00988520314324054,0
|
|
rte,acc,0.51985559566787,0.030072723167317184,0
|
|
sciq,acc,0.904,0.009320454434783222,0
|
|
sciq,acc_norm,0.893,0.009779910359847165,0
|
|
storycloze_2016,acc,0.7188669160876536,0.010395836091628113,0
|
|
winogrande,acc,0.5816890292028414,0.013863669961195918,0
|
|
|