|
task,metric,value,err,version
|
|
anli_r1,acc,0.295,0.014428554438445512,0
|
|
anli_r2,acc,0.312,0.014658474370509007,0
|
|
anli_r3,acc,0.33416666666666667,0.013622434813136769,0
|
|
arc_challenge,acc,0.3250853242320819,0.013688147309729122,0
|
|
arc_challenge,acc_norm,0.34812286689419797,0.013921008595179333,0
|
|
arc_easy,acc,0.6805555555555556,0.009567482017268095,0
|
|
arc_easy,acc_norm,0.6565656565656566,0.00974381736896003,0
|
|
boolq,acc,0.6626911314984709,0.008269171495741622,1
|
|
cb,acc,0.21428571428571427,0.055328333517248834,1
|
|
cb,f1,0.1865942028985507,,1
|
|
copa,acc,0.83,0.03775251680686371,0
|
|
hellaswag,acc,0.5261900019916351,0.0049829315659459545,0
|
|
hellaswag,acc_norm,0.702549292969528,0.004562022467161891,0
|
|
piqa,acc,0.7709466811751904,0.009804509865175504,0
|
|
piqa,acc_norm,0.7856365614798694,0.009574842136050964,0
|
|
rte,acc,0.5342960288808665,0.030025579819366426,0
|
|
sciq,acc,0.933,0.007910345983177549,0
|
|
sciq,acc_norm,0.92,0.008583336977753655,0
|
|
storycloze_2016,acc,0.7536076964190273,0.009964727533753548,0
|
|
winogrande,acc,0.6250986582478295,0.013605544523788,0
|
|
|