task,metric,value,err,version anli_r1,acc,0.328,0.01485384248727033,0 anli_r2,acc,0.328,0.014853842487270333,0 anli_r3,acc,0.325,0.013526454480351025,0 arc_challenge,acc,0.25170648464163825,0.012682496334042961,0 arc_challenge,acc_norm,0.28924914675767915,0.013250012579393443,0 arc_easy,acc,0.5618686868686869,0.010180937100600076,0 arc_easy,acc_norm,0.5441919191919192,0.010219631763437851,0 boolq,acc,0.4828746177370031,0.008739923994130054,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.29069767441860467,,1 copa,acc,0.74,0.0440844002276808,0 hellaswag,acc,0.351822346146186,0.004765629263643526,0 hellaswag,acc_norm,0.43537143995220073,0.004947922692688831,0 piqa,acc,0.6822633297062024,0.010863133246569285,0 piqa,acc_norm,0.6806311207834603,0.010877964076613742,0 rte,acc,0.49458483754512633,0.030094698123239966,0 sciq,acc,0.902,0.009406619184621224,0 sciq,acc_norm,0.901,0.009449248027662732,0 storycloze_2016,acc,0.6424371993586317,0.01108334116882779,0 winogrande,acc,0.5438042620363063,0.013998453610924324,0