lm1-misc-pile
/
619m22b22b
/evaluation
/lm1-619m-22b-results_lm-eval_global_step41007_2023-01-24-13-53-29_2shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.303,0.014539683710535259,0 | |
anli_r2,acc,0.335,0.014933117490932572,0 | |
anli_r3,acc,0.3408333333333333,0.013688600793296936,0 | |
arc_challenge,acc,0.22696245733788395,0.012240491536132866,0 | |
arc_challenge,acc_norm,0.26023890784982934,0.01282193022511256,0 | |
arc_easy,acc,0.492003367003367,0.010258471289841975,0 | |
arc_easy,acc_norm,0.4642255892255892,0.010233488709726549,0 | |
boolq,acc,0.5342507645259938,0.008724512941821092,1 | |
cb,acc,0.4642857142857143,0.06724777654937658,1 | |
cb,f1,0.3162578162578163,,1 | |
copa,acc,0.68,0.04688261722621505,0 | |
hellaswag,acc,0.302230631348337,0.004582861219020889,0 | |
hellaswag,acc_norm,0.3355905198167696,0.004712314511950981,0 | |
piqa,acc,0.6485310119695321,0.011139207691931191,0 | |
piqa,acc_norm,0.6485310119695321,0.011139207691931193,0 | |
rte,acc,0.4729241877256318,0.0300523034631437,0 | |
sciq,acc,0.843,0.0115101469792302,0 | |
sciq,acc_norm,0.825,0.012021627157731968,0 | |
storycloze_2016,acc,0.5905932656333511,0.01137105952719707,0 | |
winogrande,acc,0.5169692186266772,0.014044390401612974,0 | |