|
task,metric,value,err,version
|
|
anli_r1,acc,0.311,0.014645596385722695,0
|
|
anli_r2,acc,0.313,0.014671272822977886,0
|
|
anli_r3,acc,0.33166666666666667,0.013596836729485156,0
|
|
arc_challenge,acc,0.2568259385665529,0.0127669237941168,0
|
|
arc_challenge,acc_norm,0.30119453924914674,0.01340674176784762,0
|
|
arc_easy,acc,0.5555555555555556,0.01019625483869168,0
|
|
arc_easy,acc_norm,0.5366161616161617,0.01023223506393303,0
|
|
boolq,acc,0.6061162079510704,0.008545835792614982,1
|
|
cb,acc,0.3392857142857143,0.06384226561930828,1
|
|
cb,f1,0.23827865281885505,,1
|
|
copa,acc,0.8,0.04020151261036845,0
|
|
hellaswag,acc,0.46036646086436966,0.0049740806383642665,0
|
|
hellaswag,acc_norm,0.6048595897231627,0.00487881696101204,0
|
|
piqa,acc,0.719804134929271,0.010478122015577082,0
|
|
piqa,acc_norm,0.7181719260065288,0.010496675231258159,0
|
|
rte,acc,0.4981949458483754,0.030096267148976633,0
|
|
sciq,acc,0.852,0.011234866364235239,0
|
|
sciq,acc_norm,0.834,0.011772110370812185,0
|
|
storycloze_2016,acc,0.6755745590593266,0.01082613134499089,0
|
|
winogrande,acc,0.5580110497237569,0.013957584079109001,0
|
|
|