|
task,metric,value,err,version
|
|
anli_r1,acc,0.35,0.015090650341444235,0
|
|
anli_r2,acc,0.339,0.014976758771620339,0
|
|
anli_r3,acc,0.3566666666666667,0.013833742805050713,0
|
|
arc_challenge,acc,0.2440273037542662,0.012551447627856259,0
|
|
arc_challenge,acc_norm,0.2883959044368601,0.013238394422428175,0
|
|
arc_easy,acc,0.5917508417508418,0.01008556619579125,0
|
|
arc_easy,acc_norm,0.5340909090909091,0.010235908103438688,0
|
|
boolq,acc,0.5651376146788991,0.008670528471841557,1
|
|
cb,acc,0.375,0.06527912098338669,1
|
|
cb,f1,0.2631578947368421,,1
|
|
copa,acc,0.76,0.04292346959909283,0
|
|
hellaswag,acc,0.4500099581756622,0.004964779805180658,0
|
|
hellaswag,acc_norm,0.5825532762397929,0.00492130033128556,0
|
|
piqa,acc,0.7366702937976061,0.010276185322196764,0
|
|
piqa,acc_norm,0.7464635473340587,0.010150090834551794,0
|
|
rte,acc,0.5126353790613718,0.030086851767188564,0
|
|
sciq,acc,0.857,0.01107581480856704,0
|
|
sciq,acc_norm,0.769,0.013334797216936442,0
|
|
storycloze_2016,acc,0.7006948156066275,0.010590117252248801,0
|
|
winogrande,acc,0.5509076558800315,0.013979459389140844,0
|
|
|