lm1-2b8-55b-c4-repetitions
/
evaluation
/2b855b28bc4-results_lm-eval_global_step52452_2022-12-25-19-29-24.csv
task,metric,value,err,version | |
anli_r1,acc,0.338,0.014965960710224482,0 | |
anli_r2,acc,0.345,0.015039986742055233,0 | |
anli_r3,acc,0.32416666666666666,0.013517438120881629,0 | |
arc_challenge,acc,0.24573378839590443,0.012581033453730107,0 | |
arc_challenge,acc_norm,0.28242320819112626,0.013155456884097222,0 | |
arc_easy,acc,0.5707070707070707,0.010156678075911089,0 | |
arc_easy,acc_norm,0.5067340067340067,0.010258852980991825,0 | |
boolq,acc,0.5963302752293578,0.008581220435616821,1 | |
cb,acc,0.4107142857142857,0.06633634150359541,1 | |
cb,f1,0.2647058823529412,,1 | |
copa,acc,0.77,0.04229525846816506,0 | |
hellaswag,acc,0.4358693487353117,0.004948567856373863,0 | |
hellaswag,acc_norm,0.5608444532961562,0.0049526988022756385,0 | |
piqa,acc,0.7383025027203483,0.01025563077270823,0 | |
piqa,acc_norm,0.7421109902067464,0.010206956662056236,0 | |
rte,acc,0.5379061371841155,0.030009848912529113,0 | |
sciq,acc,0.822,0.012102167676183578,0 | |
sciq,acc_norm,0.732,0.014013292702729486,0 | |
storycloze_2016,acc,0.6926777124532336,0.010669445081866662,0 | |
winogrande,acc,0.5477505919494869,0.013988256216606008,0 | |