task,metric,value,err,version anli_r1,acc,0.325,0.014818724459095526,0 anli_r2,acc,0.303,0.014539683710535246,0 anli_r3,acc,0.3458333333333333,0.013736245342311014,0 arc_challenge,acc,0.3148464163822526,0.01357265770308495,0 arc_challenge,acc_norm,0.32764505119453924,0.013715847940719348,0 arc_easy,acc,0.6553030303030303,0.009752321586569784,0 arc_easy,acc_norm,0.6435185185185185,0.009828046544504438,0 boolq,acc,0.5474006116207951,0.008705669190431184,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.3554421768707483,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.462158932483569,0.0049754706908671535,0 hellaswag,acc_norm,0.6109340768771161,0.0048654194682138914,0 piqa,acc,0.7535364526659413,0.010054810789671822,0 piqa,acc_norm,0.7622415669205659,0.009932525779525492,0 rte,acc,0.5451263537906137,0.029973636495415252,0 sciq,acc,0.904,0.009320454434783219,0 sciq,acc_norm,0.901,0.009449248027662737,0 storycloze_2016,acc,0.7006948156066275,0.010590117252248798,0 winogrande,acc,0.5982636148382005,0.0137784392666495,0