|
task,metric,value,err,version
|
|
anli_r1,acc,0.332,0.014899597242811475,0
|
|
anli_r2,acc,0.333,0.014910846164229864,0
|
|
anli_r3,acc,0.34,0.013680495725767797,0
|
|
arc_challenge,acc,0.295221843003413,0.01332975029338232,0
|
|
arc_challenge,acc_norm,0.30802047781569963,0.013491429517292038,0
|
|
arc_easy,acc,0.6195286195286195,0.009962305992058577,0
|
|
arc_easy,acc_norm,0.5959595959595959,0.01006906164954955,0
|
|
boolq,acc,0.6180428134556575,0.00849785199842719,1
|
|
cb,acc,0.35714285714285715,0.06460957383809218,1
|
|
cb,f1,0.2528248587570622,,1
|
|
copa,acc,0.77,0.04229525846816506,0
|
|
hellaswag,acc,0.46932881896036643,0.004980384575535383,0
|
|
hellaswag,acc_norm,0.6172077275443139,0.0048507486878599185,0
|
|
piqa,acc,0.7464635473340587,0.010150090834551794,0
|
|
piqa,acc_norm,0.7573449401523396,0.010002002569708688,0
|
|
rte,acc,0.4693140794223827,0.03003973059219781,0
|
|
sciq,acc,0.896,0.009658016218524301,0
|
|
sciq,acc_norm,0.871,0.010605256784796586,0
|
|
storycloze_2016,acc,0.7183324425440941,0.010401844358587665,0
|
|
winogrande,acc,0.5927387529597474,0.013808654122417862,0
|
|
|