|
task,metric,value,err,version
|
|
anli_r1,acc,0.339,0.014976758771620339,0
|
|
anli_r2,acc,0.342,0.015008706182121728,0
|
|
anli_r3,acc,0.37083333333333335,0.01394962856014431,0
|
|
arc_challenge,acc,0.2687713310580205,0.01295506596371069,0
|
|
arc_challenge,acc_norm,0.2696245733788396,0.012968040686869147,0
|
|
arc_easy,acc,0.5791245791245792,0.010130502164066342,0
|
|
arc_easy,acc_norm,0.5357744107744108,0.010233488709726539,0
|
|
boolq,acc,0.6103975535168196,0.008529228894936293,1
|
|
cb,acc,0.42857142857142855,0.06672848092813058,1
|
|
cb,f1,0.30272108843537415,,1
|
|
copa,acc,0.73,0.0446196043338474,0
|
|
hellaswag,acc,0.4326827325234017,0.004944351065545858,0
|
|
hellaswag,acc_norm,0.5660227046405099,0.004946089230153027,0
|
|
piqa,acc,0.7366702937976061,0.010276185322196764,0
|
|
piqa,acc_norm,0.7383025027203483,0.010255630772708227,0
|
|
rte,acc,0.5415162454873647,0.02999253538537331,0
|
|
sciq,acc,0.867,0.010743669132397332,0
|
|
sciq,acc_norm,0.815,0.012285191326386686,0
|
|
storycloze_2016,acc,0.6862640299305185,0.010730179119317625,0
|
|
winogrande,acc,0.5469613259668509,0.01399036663214809,0
|
|
|