|
task,metric,value,err,version
|
|
anli_r1,acc,0.331,0.014888272588203938,0
|
|
anli_r2,acc,0.324,0.014806864733738863,0
|
|
anli_r3,acc,0.3416666666666667,0.013696658778002519,0
|
|
arc_challenge,acc,0.3370307167235495,0.013813476652902265,0
|
|
arc_challenge,acc_norm,0.35665529010238906,0.013998056902620203,0
|
|
arc_easy,acc,0.686026936026936,0.00952324533521551,0
|
|
arc_easy,acc_norm,0.6628787878787878,0.009700146509130068,0
|
|
boolq,acc,0.6467889908256881,0.008359705247064296,1
|
|
cb,acc,0.14285714285714285,0.047184161362558305,1
|
|
cb,f1,0.1381769825918762,,1
|
|
copa,acc,0.83,0.03775251680686371,0
|
|
hellaswag,acc,0.5295757817167894,0.004981044370530809,0
|
|
hellaswag,acc_norm,0.7048396733718383,0.0045518262729780596,0
|
|
piqa,acc,0.7742110990206746,0.009754980670917315,0
|
|
piqa,acc_norm,0.7867247007616975,0.00955712122586134,0
|
|
rte,acc,0.49458483754512633,0.030094698123239966,0
|
|
sciq,acc,0.938,0.0076298239962803065,0
|
|
sciq,acc_norm,0.918,0.00868051561552373,0
|
|
storycloze_2016,acc,0.7514698022447889,0.009993659448666372,0
|
|
winogrande,acc,0.611681136543015,0.013697456658457232,0
|
|
|