task,metric,value,err,version anli_r1,acc,0.29,0.014356395999905689,0 anli_r2,acc,0.365,0.015231776226264903,0 anli_r3,acc,0.3333333333333333,0.013613950010225603,0 arc_challenge,acc,0.21843003412969283,0.01207429160570098,0 arc_challenge,acc_norm,0.25597269624573377,0.012753013241244518,0 arc_easy,acc,0.5370370370370371,0.010231597249131051,0 arc_easy,acc_norm,0.5172558922558923,0.010253671674754631,0 boolq,acc,0.5663608562691131,0.008667690464344683,1 cb,acc,0.6071428571428571,0.0658538889806635,1 cb,f1,0.4062342885872297,,1 copa,acc,0.73,0.044619604333847394,0 hellaswag,acc,0.36168094005178253,0.004795051037917719,0 hellaswag,acc_norm,0.45210117506472813,0.004966832553245038,0 piqa,acc,0.704570184983678,0.010644731559342464,0 piqa,acc_norm,0.705658324265506,0.010633311470347519,0 rte,acc,0.5379061371841155,0.030009848912529117,0 sciq,acc,0.889,0.009938701010583726,0 sciq,acc_norm,0.875,0.010463483381956722,0 storycloze_2016,acc,0.6344200962052379,0.011136758947688388,0 winogrande,acc,0.5122336227308603,0.01404827882040562,0