|
task,metric,value,err,version
|
|
anli_r1,acc,0.327,0.014842213153411242,0
|
|
anli_r2,acc,0.334,0.014922019523732968,0
|
|
anli_r3,acc,0.3358333333333333,0.013639261190932889,0
|
|
arc_challenge,acc,0.24232081911262798,0.012521593295800115,0
|
|
arc_challenge,acc_norm,0.2781569965870307,0.013094469919538793,0
|
|
arc_easy,acc,0.494949494949495,0.010259260102565861,0
|
|
arc_easy,acc_norm,0.44612794612794615,0.01020005782876501,0
|
|
boolq,acc,0.45321100917431195,0.00870668126587249,1
|
|
cb,acc,0.39285714285714285,0.0658538889806635,1
|
|
cb,f1,0.36502627806975624,,1
|
|
copa,acc,0.66,0.04760952285695238,0
|
|
hellaswag,acc,0.3937462656841267,0.004875812021461996,0
|
|
hellaswag,acc_norm,0.48994224258115915,0.004988771791854512,0
|
|
piqa,acc,0.6969532100108814,0.010722648689531525,0
|
|
piqa,acc_norm,0.7018498367791077,0.01067296411400829,0
|
|
rte,acc,0.5595667870036101,0.02988212336311872,0
|
|
sciq,acc,0.704,0.01444273494157502,0
|
|
sciq,acc_norm,0.637,0.015213890444671283,0
|
|
storycloze_2016,acc,0.6702298236237306,0.01087168247139514,0
|
|
winogrande,acc,0.5303867403314917,0.014026510839428743,0
|
|
|