task,metric,value,err,version anli_r1,acc,0.339,0.014976758771620345,0 anli_r2,acc,0.355,0.015139491543780532,0 anli_r3,acc,0.3333333333333333,0.013613950010225603,0 arc_challenge,acc,0.2901023890784983,0.01326157367752077,0 arc_challenge,acc_norm,0.3199658703071672,0.013631345807016193,0 arc_easy,acc,0.6494107744107744,0.00979100382983156,0 arc_easy,acc_norm,0.6279461279461279,0.009918187193096471,0 boolq,acc,0.5620795107033639,0.008677388652709261,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.3383838383838384,,1 copa,acc,0.81,0.03942772444036623,0 hellaswag,acc,0.475502887870942,0.0049837889926812,0 hellaswag,acc_norm,0.6351324437363075,0.004804091708812568,0 piqa,acc,0.7524483133841132,0.010069703966857102,0 piqa,acc_norm,0.764417845484222,0.009901067586473885,0 rte,acc,0.5523465703971119,0.02993107036293953,0 sciq,acc,0.921,0.008534156773333442,0 sciq,acc_norm,0.919,0.00863212103213998,0 storycloze_2016,acc,0.7354355959380011,0.010200400541714165,0 winogrande,acc,0.6006314127861089,0.013764933546717612,0