|
task,metric,value,err,version
|
|
anli_r1,acc,0.317,0.014721675438880224,0
|
|
anli_r2,acc,0.366,0.015240612726405756,0
|
|
anli_r3,acc,0.32083333333333336,0.013480882752851553,0
|
|
arc_challenge,acc,0.23720136518771331,0.012430399829260851,0
|
|
arc_challenge,acc_norm,0.2696245733788396,0.01296804068686916,0
|
|
arc_easy,acc,0.5349326599326599,0.010234713052723667,0
|
|
arc_easy,acc_norm,0.5193602693602694,0.01025208949116552,0
|
|
boolq,acc,0.5155963302752293,0.008740799550176545,1
|
|
cb,acc,0.5,0.06741998624632421,1
|
|
cb,f1,0.3491841491841492,,1
|
|
copa,acc,0.7,0.046056618647183814,0
|
|
hellaswag,acc,0.36427006572395937,0.004802413919932656,0
|
|
hellaswag,acc_norm,0.44503087034455285,0.004959535443170619,0
|
|
piqa,acc,0.6936887921653971,0.01075497003236732,0
|
|
piqa,acc_norm,0.7007616974972797,0.010684130673134581,0
|
|
rte,acc,0.516245487364621,0.030080573208738064,0
|
|
sciq,acc,0.899,0.00953361892934099,0
|
|
sciq,acc_norm,0.887,0.010016552866696865,0
|
|
storycloze_2016,acc,0.6435061464457509,0.011075964871051003,0
|
|
winogrande,acc,0.5098658247829518,0.014049749833367592,0
|
|
|