task,metric,value,err,version anli_r1,acc,0.325,0.014818724459095522,0 anli_r2,acc,0.312,0.014658474370509017,0 anli_r3,acc,0.3541666666666667,0.013811933499570961,0 arc_challenge,acc,0.3097269624573379,0.01351205841523836,0 arc_challenge,acc_norm,0.3148464163822526,0.013572657703084948,0 arc_easy,acc,0.6452020202020202,0.009817629113069696,0 arc_easy,acc_norm,0.6494107744107744,0.009791003829831557,0 boolq,acc,0.5954128440366973,0.008584355308932687,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.299808429118774,,1 copa,acc,0.76,0.04292346959909282,0 hellaswag,acc,0.45518820952001593,0.004969701081068372,0 hellaswag,acc_norm,0.5997809201354312,0.004889413126208782,0 piqa,acc,0.733949945593036,0.010310039263352831,0 piqa,acc_norm,0.7383025027203483,0.010255630772708232,0 rte,acc,0.4981949458483754,0.030096267148976633,0 sciq,acc,0.915,0.00882342636694233,0 sciq,acc_norm,0.917,0.008728527206074792,0 storycloze_2016,acc,0.711918760021379,0.010472537019822582,0 winogrande,acc,0.5761641673243884,0.01388849238994452,0