task,metric,value,err,version anli_r1,acc,0.317,0.01472167543888022,0 anli_r2,acc,0.314,0.014683991951087966,0 anli_r3,acc,0.355,0.013819249004047296,0 arc_challenge,acc,0.2883959044368601,0.013238394422428173,0 arc_challenge,acc_norm,0.3165529010238908,0.01359243151906808,0 arc_easy,acc,0.6241582491582491,0.00993843637317063,0 arc_easy,acc_norm,0.5833333333333334,0.010116282977781263,0 boolq,acc,0.6030581039755352,0.00855727696467513,1 cb,acc,0.30357142857142855,0.06199938655510754,1 cb,f1,0.20076628352490422,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.477096195976897,0.004984543540932339,0 hellaswag,acc_norm,0.6249751045608445,0.0048313992185002475,0 piqa,acc,0.7595212187159956,0.009971345364651073,0 piqa,acc_norm,0.750272034820457,0.010099232969867469,0 rte,acc,0.5126353790613718,0.030086851767188564,0 sciq,acc,0.901,0.00944924802766276,0 sciq,acc_norm,0.874,0.010499249222408035,0 storycloze_2016,acc,0.7108498129342598,0.010484068799942077,0 winogrande,acc,0.6006314127861089,0.013764933546717614,0