|
task,metric,value,err,version
|
|
anli_r1,acc,0.33,0.014876872027456732,0
|
|
anli_r2,acc,0.346,0.015050266127564443,0
|
|
anli_r3,acc,0.3258333333333333,0.01353542204341746,0
|
|
arc_challenge,acc,0.3148464163822526,0.01357265770308495,0
|
|
arc_challenge,acc_norm,0.3225255972696246,0.013659980894277366,0
|
|
arc_easy,acc,0.6414141414141414,0.009840882301225297,0
|
|
arc_easy,acc_norm,0.6136363636363636,0.009991296778159619,0
|
|
boolq,acc,0.6256880733944954,0.008464246656443233,1
|
|
cb,acc,0.4642857142857143,0.06724777654937658,1
|
|
cb,f1,0.38268797942216715,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.4753037243576977,0.0049836910991109115,0
|
|
hellaswag,acc_norm,0.6382194781915953,0.004795337009118188,0
|
|
piqa,acc,0.7584330794341676,0.009986718001804467,0
|
|
piqa,acc_norm,0.7633297062023939,0.009916841655042809,0
|
|
rte,acc,0.5487364620938628,0.029953149241808946,0
|
|
sciq,acc,0.923,0.008434580140240643,0
|
|
sciq,acc_norm,0.915,0.008823426366942314,0
|
|
storycloze_2016,acc,0.7161945483698557,0.010425696279730922,0
|
|
winogrande,acc,0.5974743488555643,0.013782866831703048,0
|
|
|