task,metric,value,err,version anli_r1,acc,0.338,0.014965960710224473,0 anli_r2,acc,0.333,0.01491084616422986,0 anli_r3,acc,0.335,0.01363087184382148,0 arc_challenge,acc,0.181740614334471,0.011269198948880236,0 arc_challenge,acc_norm,0.22098976109215018,0.012124929206818258,0 arc_easy,acc,0.43434343434343436,0.010170943451269425,0 arc_easy,acc_norm,0.382996632996633,0.009974920384536472,0 boolq,acc,0.5935779816513761,0.008590531708882188,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.1940928270042194,,1 copa,acc,0.63,0.048523658709391,0 hellaswag,acc,0.29725154351722766,0.004561141293448468,0 hellaswag,acc_norm,0.3256323441545509,0.004676529200753,0 piqa,acc,0.6300326441784548,0.011264415223415281,0 piqa,acc_norm,0.6322089227421109,0.011250616646678792,0 rte,acc,0.5234657039711191,0.03006330041190266,0 sciq,acc,0.736,0.013946271849440472,0 sciq,acc_norm,0.668,0.014899597242811476,0 storycloze_2016,acc,0.5916622127204704,0.011366477562142522,0 winogrande,acc,0.5090765588003157,0.01405017009449771,0