|
task,metric,value,err,version
|
|
anli_r1,acc,0.335,0.014933117490932579,0
|
|
anli_r2,acc,0.356,0.015149042659306621,0
|
|
anli_r3,acc,0.31833333333333336,0.013452948996996292,0
|
|
arc_challenge,acc,0.2363481228668942,0.012414960524301834,0
|
|
arc_challenge,acc_norm,0.2525597269624573,0.012696728980207706,0
|
|
arc_easy,acc,0.47769360269360267,0.010249568404555636,0
|
|
arc_easy,acc_norm,0.45454545454545453,0.010217299762709433,0
|
|
boolq,acc,0.4941896024464832,0.008744464477761504,1
|
|
cb,acc,0.4642857142857143,0.0672477765493766,1
|
|
cb,f1,0.2986425339366516,,1
|
|
copa,acc,0.64,0.048241815132442176,0
|
|
hellaswag,acc,0.37641904003186616,0.0048349694128836315,0
|
|
hellaswag,acc_norm,0.4523999203345947,0.004967118575905289,0
|
|
piqa,acc,0.6828073993471164,0.01085815545438087,0
|
|
piqa,acc_norm,0.6789989118607181,0.010892641574707906,0
|
|
rte,acc,0.5234657039711191,0.030063300411902652,0
|
|
sciq,acc,0.714,0.01429714686251791,0
|
|
sciq,acc_norm,0.696,0.01455320568795043,0
|
|
storycloze_2016,acc,0.6306787814003206,0.011160545865067166,0
|
|
winogrande,acc,0.4996053670086819,0.014052481306049516,0
|
|
|