|
task,metric,value,err,version
|
|
anli_r1,acc,0.32,0.01475865230357487,0
|
|
anli_r2,acc,0.335,0.014933117490932566,0
|
|
anli_r3,acc,0.33916666666666667,0.013672343491681812,0
|
|
arc_challenge,acc,0.3430034129692833,0.013872423223718173,0
|
|
arc_challenge,acc_norm,0.34982935153583616,0.013936809212158284,0
|
|
arc_easy,acc,0.672979797979798,0.009626235849372207,0
|
|
arc_easy,acc_norm,0.6553030303030303,0.009752321586569784,0
|
|
boolq,acc,0.6464831804281346,0.008361346005339394,1
|
|
cb,acc,0.375,0.06527912098338669,1
|
|
cb,f1,0.28595317725752506,,1
|
|
copa,acc,0.89,0.03144660377352203,0
|
|
hellaswag,acc,0.5285799641505676,0.004981623292196192,0
|
|
hellaswag,acc_norm,0.7057359091814379,0.00454779896412668,0
|
|
piqa,acc,0.7774755168661589,0.009704600975718245,0
|
|
piqa,acc_norm,0.7861806311207835,0.009565994206915606,0
|
|
rte,acc,0.5451263537906137,0.029973636495415255,0
|
|
sciq,acc,0.929,0.008125578442487923,0
|
|
sciq,acc_norm,0.923,0.008434580140240644,0
|
|
storycloze_2016,acc,0.757883484767504,0.009905870033193868,0
|
|
winogrande,acc,0.6527229676400947,0.013380909249751242,0
|
|
|