|
task,metric,value,err,version
|
|
anli_r1,acc,0.328,0.014853842487270334,0
|
|
anli_r2,acc,0.352,0.015110404505648668,0
|
|
anli_r3,acc,0.34,0.013680495725767789,0
|
|
arc_challenge,acc,0.22696245733788395,0.01224049153613287,0
|
|
arc_challenge,acc_norm,0.2525597269624573,0.012696728980207704,0
|
|
arc_easy,acc,0.48063973063973064,0.01025208949116552,0
|
|
arc_easy,acc_norm,0.46675084175084175,0.010237073872130747,0
|
|
boolq,acc,0.43241590214067277,0.008664798701065797,1
|
|
cb,acc,0.39285714285714285,0.0658538889806635,1
|
|
cb,f1,0.23650793650793656,,1
|
|
copa,acc,0.67,0.04725815626252609,0
|
|
hellaswag,acc,0.3972316271659032,0.00488324657949666,0
|
|
hellaswag,acc_norm,0.4969129655447122,0.004989686307484565,0
|
|
piqa,acc,0.690424374319913,0.010786656752183345,0
|
|
piqa,acc_norm,0.6958650707290533,0.010733493335721314,0
|
|
rte,acc,0.51985559566787,0.030072723167317177,0
|
|
sciq,acc,0.746,0.013772206565168544,0
|
|
sciq,acc_norm,0.722,0.014174516461485256,0
|
|
storycloze_2016,acc,0.6515232495991449,0.011018717784788488,0
|
|
winogrande,acc,0.5059194948697711,0.014051500838485807,0
|
|
|