|
task,metric,value,err,version
|
|
anli_r1,acc,0.312,0.014658474370509005,0
|
|
anli_r2,acc,0.328,0.014853842487270336,0
|
|
anli_r3,acc,0.32666666666666666,0.013544340907003665,0
|
|
arc_challenge,acc,0.2832764505119454,0.013167478735134575,0
|
|
arc_challenge,acc_norm,0.29436860068259385,0.013318528460539422,0
|
|
arc_easy,acc,0.6094276094276094,0.01001105911206424,0
|
|
arc_easy,acc_norm,0.5631313131313131,0.010177672928157695,0
|
|
boolq,acc,0.5324159021406728,0.008726657178723137,1
|
|
cb,acc,0.5,0.06741998624632421,1
|
|
cb,f1,0.4627446995868048,,1
|
|
copa,acc,0.71,0.04560480215720684,0
|
|
hellaswag,acc,0.3833897629954192,0.0048521826212742526,0
|
|
hellaswag,acc_norm,0.47769368651663013,0.00498481339101621,0
|
|
piqa,acc,0.750816104461371,0.010091882770120216,0
|
|
piqa,acc_norm,0.7584330794341676,0.009986718001804439,0
|
|
rte,acc,0.4657039711191336,0.030025579819366426,0
|
|
sciq,acc,0.84,0.011598902298689004,0
|
|
sciq,acc_norm,0.795,0.012772554096113118,0
|
|
storycloze_2016,acc,0.6456440406199893,0.011061031791615487,0
|
|
winogrande,acc,0.5706393054459353,0.01391153749996917,0
|
|
|