|
task,metric,value,err,version
|
|
anli_r1,acc,0.335,0.014933117490932575,0
|
|
anli_r2,acc,0.328,0.014853842487270334,0
|
|
anli_r3,acc,0.3433333333333333,0.01371263383046586,0
|
|
arc_challenge,acc,0.3378839590443686,0.013822047922283516,0
|
|
arc_challenge,acc_norm,0.3643344709897611,0.014063260279882412,0
|
|
arc_easy,acc,0.6957070707070707,0.009441202922359183,0
|
|
arc_easy,acc_norm,0.6717171717171717,0.00963574950926216,0
|
|
boolq,acc,0.6440366972477064,0.008374337517726581,1
|
|
cb,acc,0.14285714285714285,0.047184161362558305,1
|
|
cb,f1,0.13156966490299823,,1
|
|
copa,acc,0.84,0.03684529491774709,0
|
|
hellaswag,acc,0.5320653256323441,0.00497951000177662,0
|
|
hellaswag,acc_norm,0.7050388368850826,0.004550933142528758,0
|
|
piqa,acc,0.7736670293797606,0.009763294246879427,0
|
|
piqa,acc_norm,0.7845484221980413,0.009592463115658107,0
|
|
rte,acc,0.49097472924187724,0.030091559826331334,0
|
|
sciq,acc,0.931,0.008018934050315155,0
|
|
sciq,acc_norm,0.922,0.008484573530118587,0
|
|
storycloze_2016,acc,0.7536076964190273,0.009964727533753546,0
|
|
winogrande,acc,0.6148382004735596,0.013676821287521413,0
|
|
|