task,metric,value,err,version anli_r1,acc,0.323,0.014794927843348635,0 anli_r2,acc,0.332,0.014899597242811482,0 anli_r3,acc,0.33,0.013579531277800923,0 arc_challenge,acc,0.2235494880546075,0.012174896631202605,0 arc_challenge,acc_norm,0.2525597269624573,0.012696728980207706,0 arc_easy,acc,0.5294612794612794,0.010241957728409686,0 arc_easy,acc_norm,0.476010101010101,0.010247967392742691,0 boolq,acc,0.5788990825688073,0.008635491562221343,1 cb,acc,0.25,0.058387420812114225,1 cb,f1,0.2191358024691358,,1 copa,acc,0.69,0.04648231987117316,0 hellaswag,acc,0.3650667197769369,0.004804649197163699,0 hellaswag,acc_norm,0.4427404899422426,0.004956953917781311,0 piqa,acc,0.7034820457018498,0.010656078922661153,0 piqa,acc_norm,0.7067464635473341,0.010621818421101931,0 rte,acc,0.5451263537906137,0.029973636495415252,0 sciq,acc,0.811,0.012386784588117707,0 sciq,acc_norm,0.726,0.014111099288259587,0 storycloze_2016,acc,0.6467129877071085,0.011053474766125627,0 winogrande,acc,0.5185477505919495,0.014042813708888378,0