task,metric,value,err,version anli_r1,acc,0.309,0.014619600977206494,0 anli_r2,acc,0.332,0.014899597242811482,0 anli_r3,acc,0.32083333333333336,0.013480882752851557,0 arc_challenge,acc,0.23122866894197952,0.012320858834772274,0 arc_challenge,acc_norm,0.2508532423208191,0.012668198621315433,0 arc_easy,acc,0.5311447811447811,0.010239860250021748,0 arc_easy,acc_norm,0.5214646464646465,0.010250325159456656,0 boolq,acc,0.5486238532110091,0.00870360524342519,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.34848484848484845,,1 copa,acc,0.66,0.04760952285695237,0 hellaswag,acc,0.3626767576180044,0.004797900720081492,0 hellaswag,acc_norm,0.4462258514240191,0.004960839986099517,0 piqa,acc,0.6980413492927094,0.01071173289158836,0 piqa,acc_norm,0.690968443960827,0.010781419464406979,0 rte,acc,0.48375451263537905,0.030080573208738064,0 sciq,acc,0.886,0.010055103435823328,0 sciq,acc_norm,0.877,0.010391293421849877,0 storycloze_2016,acc,0.6317477284874399,0.011153823258531747,0 winogrande,acc,0.5303867403314917,0.014026510839428751,0