|
task,metric,value,err,version
|
|
anli_r1,acc,0.334,0.014922019523732968,0
|
|
anli_r2,acc,0.339,0.014976758771620344,0
|
|
anli_r3,acc,0.3308333333333333,0.013588208070709006,0
|
|
arc_challenge,acc,0.35494880546075086,0.013983036904094099,0
|
|
arc_challenge,acc_norm,0.36689419795221845,0.014084133118104298,0
|
|
arc_easy,acc,0.6952861952861953,0.009444871667360213,0
|
|
arc_easy,acc_norm,0.6797138047138047,0.009574152668739419,0
|
|
boolq,acc,0.6165137614678899,0.008504304838837023,1
|
|
cb,acc,0.35714285714285715,0.06460957383809221,1
|
|
cb,f1,0.182648401826484,,1
|
|
copa,acc,0.87,0.03379976689896309,0
|
|
hellaswag,acc,0.5327623979286995,0.004979058078478698,0
|
|
hellaswag,acc_norm,0.7144991037641903,0.004507296196227816,0
|
|
piqa,acc,0.7774755168661589,0.009704600975718238,0
|
|
piqa,acc_norm,0.79379760609358,0.009439460331609514,0
|
|
rte,acc,0.5667870036101083,0.029826764082138274,0
|
|
sciq,acc,0.942,0.007395315455792942,0
|
|
sciq,acc_norm,0.934,0.007855297938697596,0
|
|
storycloze_2016,acc,0.7696419027258151,0.009737002698356936,0
|
|
winogrande,acc,0.6345698500394633,0.013533965097638793,0
|
|
|