Muennighoff's picture
Add eval
2e29a11
raw
history blame
1.05 kB
task,metric,value,err,version
anli_r1,acc,0.323,0.01479492784334864,0
anli_r2,acc,0.302,0.014526080235459548,0
anli_r3,acc,0.3375,0.013655897185463648,0
arc_challenge,acc,0.29180887372013653,0.01328452529240351,0
arc_challenge,acc_norm,0.33532423208191126,0.013796182947785562,0
arc_easy,acc,0.6456228956228957,0.00981500403025175,0
arc_easy,acc_norm,0.6506734006734006,0.0097828534493993,0
boolq,acc,0.6223241590214067,0.008479309208281643,1
cb,acc,0.48214285714285715,0.0673769750864465,1
cb,f1,0.3081617086193746,,1
copa,acc,0.73,0.044619604333847394,0
hellaswag,acc,0.45269866560446126,0.004967402792744857,0
hellaswag,acc_norm,0.601274646484764,0.004886353563571844,0
piqa,acc,0.7388465723612623,0.010248738649935581,0
piqa,acc_norm,0.7459194776931447,0.010157271999135055,0
rte,acc,0.5126353790613718,0.030086851767188564,0
sciq,acc,0.931,0.00801893405031515,0
sciq,acc_norm,0.936,0.007743640226919298,0
storycloze_2016,acc,0.7097808658471406,0.010495529690730063,0
winogrande,acc,0.569060773480663,0.01391779662333596,0