|
task,metric,value,err,version
|
|
anli_r1,acc,0.309,0.01461960097720649,0
|
|
anli_r2,acc,0.336,0.014944140233795018,0
|
|
anli_r3,acc,0.34833333333333333,0.013759437498874075,0
|
|
arc_challenge,acc,0.2815699658703072,0.013143376735009022,0
|
|
arc_challenge,acc_norm,0.3054607508532423,0.013460080478002498,0
|
|
arc_easy,acc,0.5753367003367004,0.010142653687480416,0
|
|
arc_easy,acc_norm,0.5513468013468014,0.010205540414612871,0
|
|
boolq,acc,0.617737003058104,0.008499149690449273,1
|
|
cb,acc,0.44642857142857145,0.06703189227942398,1
|
|
cb,f1,0.3456203829338158,,1
|
|
copa,acc,0.77,0.04229525846816506,0
|
|
hellaswag,acc,0.46036646086436966,0.004974080638364265,0
|
|
hellaswag,acc_norm,0.6097390957976498,0.004868117598481941,0
|
|
piqa,acc,0.7377584330794341,0.01026250256517245,0
|
|
piqa,acc_norm,0.7404787812840044,0.010227939888173923,0
|
|
rte,acc,0.5126353790613718,0.030086851767188564,0
|
|
sciq,acc,0.848,0.01135891830347528,0
|
|
sciq,acc_norm,0.845,0.011450157470799475,0
|
|
storycloze_2016,acc,0.692143238909674,0.010674598158758186,0
|
|
winogrande,acc,0.5627466456195738,0.013941393310695924,0
|
|
|