task,metric,value,err,version anli_r1,acc,0.365,0.015231776226264891,0 anli_r2,acc,0.357,0.015158521721486778,0 anli_r3,acc,0.3458333333333333,0.013736245342311014,0 arc_challenge,acc,0.2525597269624573,0.012696728980207704,0 arc_challenge,acc_norm,0.29436860068259385,0.013318528460539422,0 arc_easy,acc,0.5925925925925926,0.010082326627832861,0 arc_easy,acc_norm,0.5664983164983165,0.010168640625454101,0 boolq,acc,0.5761467889908257,0.008643046537505769,1 cb,acc,0.5357142857142857,0.06724777654937658,1 cb,f1,0.3502178649237473,,1 copa,acc,0.74,0.0440844002276808,0 hellaswag,acc,0.442441744672376,0.0049566093272184015,0 hellaswag,acc_norm,0.5873332005576578,0.004913076844433741,0 piqa,acc,0.7366702937976061,0.010276185322196764,0 piqa,acc_norm,0.7448313384113167,0.01017157159252183,0 rte,acc,0.48736462093862815,0.030086851767188564,0 sciq,acc,0.868,0.010709373963528031,0 sciq,acc_norm,0.838,0.01165726777130442,0 storycloze_2016,acc,0.694815606627472,0.010648664383985668,0 winogrande,acc,0.5469613259668509,0.013990366632148104,0