|
task,metric,value,err,version
|
|
anli_r1,acc,0.34,0.014987482264363937,0
|
|
anli_r2,acc,0.375,0.015316971293620996,0
|
|
anli_r3,acc,0.3358333333333333,0.013639261190932882,0
|
|
arc_challenge,acc,0.23293515358361774,0.012352507042617394,0
|
|
arc_challenge,acc_norm,0.26023890784982934,0.012821930225112556,0
|
|
arc_easy,acc,0.4718013468013468,0.010243454104071792,0
|
|
arc_easy,acc_norm,0.4553872053872054,0.010218861787618732,0
|
|
boolq,acc,0.4504587155963303,0.008702022442950874,1
|
|
cb,acc,0.5357142857142857,0.06724777654937658,1
|
|
cb,f1,0.538474366304555,,1
|
|
copa,acc,0.68,0.046882617226215034,0
|
|
hellaswag,acc,0.3951404102768373,0.004878816961012042,0
|
|
hellaswag,acc_norm,0.49372634933280224,0.0049893886134388,0
|
|
piqa,acc,0.6920565832426551,0.010770892367463689,0
|
|
piqa,acc_norm,0.6985854189336235,0.01070624824275376,0
|
|
rte,acc,0.5090252707581228,0.030091559826331334,0
|
|
sciq,acc,0.728,0.014078856992462615,0
|
|
sciq,acc_norm,0.711,0.014341711358296183,0
|
|
storycloze_2016,acc,0.6531266702298236,0.011006857922124124,0
|
|
winogrande,acc,0.5343330702446725,0.014019317531542569,0
|
|
|