|
task,metric,value,err,version
|
|
anli_r1,acc,0.33,0.014876872027456734,0
|
|
anli_r2,acc,0.314,0.014683991951087967,0
|
|
anli_r3,acc,0.3675,0.013923529685359282,0
|
|
arc_challenge,acc,0.3319112627986348,0.013760988200880536,0
|
|
arc_challenge,acc_norm,0.3515358361774744,0.013952413699600943,0
|
|
arc_easy,acc,0.6767676767676768,0.009597218642045324,0
|
|
arc_easy,acc_norm,0.6439393939393939,0.009825454608416304,0
|
|
boolq,acc,0.6406727828746177,0.00839181177040674,1
|
|
cb,acc,0.48214285714285715,0.0673769750864465,1
|
|
cb,f1,0.36000000000000004,,1
|
|
copa,acc,0.76,0.04292346959909283,0
|
|
hellaswag,acc,0.529874526986656,0.004980866814462756,0
|
|
hellaswag,acc_norm,0.7042421828321052,0.004554499409290722,0
|
|
piqa,acc,0.7823721436343852,0.009627407474840878,0
|
|
piqa,acc_norm,0.7861806311207835,0.009565994206915606,0
|
|
rte,acc,0.5379061371841155,0.030009848912529113,0
|
|
sciq,acc,0.919,0.008632121032139985,0
|
|
sciq,acc_norm,0.915,0.00882342636694232,0
|
|
storycloze_2016,acc,0.7514698022447889,0.009993659448666372,0
|
|
winogrande,acc,0.6385161799526441,0.013502479670791285,0
|
|
|