|
task,metric,value,err,version
|
|
anli_r1,acc,0.323,0.014794927843348637,0
|
|
anli_r2,acc,0.325,0.014818724459095527,0
|
|
anli_r3,acc,0.33666666666666667,0.013647602942406389,0
|
|
arc_challenge,acc,0.2713310580204778,0.012993807727545801,0
|
|
arc_challenge,acc_norm,0.3054607508532423,0.01346008047800251,0
|
|
arc_easy,acc,0.6321548821548821,0.009894923464455196,0
|
|
arc_easy,acc_norm,0.5795454545454546,0.010129114278546524,0
|
|
boolq,acc,0.6235474006116208,0.008473882279194588,1
|
|
cb,acc,0.4642857142857143,0.06724777654937658,1
|
|
cb,f1,0.32592592592592595,,1
|
|
copa,acc,0.74,0.04408440022768077,0
|
|
hellaswag,acc,0.4780920135431189,0.004984989320648131,0
|
|
hellaswag,acc_norm,0.6283608842859988,0.004822550638450904,0
|
|
piqa,acc,0.7546245919477693,0.0100398313204224,0
|
|
piqa,acc_norm,0.7622415669205659,0.009932525779525489,0
|
|
rte,acc,0.5487364620938628,0.029953149241808946,0
|
|
sciq,acc,0.905,0.009276910103103326,0
|
|
sciq,acc_norm,0.873,0.010534798620855759,0
|
|
storycloze_2016,acc,0.7135221806520577,0.01045510591863303,0
|
|
winogrande,acc,0.5895816890292028,0.013825107120035863,0
|
|
|