lm1-2b8-55b-realtasky
/
evaluation
/lm1-2b8-55b-realtasky-results_lm-eval_global_step52452_2022-12-28-09-22-14.csv
task,metric,value,err,version | |
anli_r1,acc,0.286,0.014297146862517908,0 | |
anli_r2,acc,0.304,0.01455320568795044,0 | |
anli_r3,acc,0.3175,0.013443538681348054,0 | |
arc_challenge,acc,0.26535836177474403,0.012902554762313969,0 | |
arc_challenge,acc_norm,0.2909556313993174,0.013273077865907573,0 | |
arc_easy,acc,0.5122053872053872,0.010256726235129016,0 | |
arc_easy,acc_norm,0.4877946127946128,0.01025672623512901,0 | |
boolq,acc,0.6862385321100918,0.008115773046958279,1 | |
cb,acc,0.35714285714285715,0.06460957383809221,1 | |
cb,f1,0.27666815942678014,,1 | |
copa,acc,0.69,0.04648231987117316,0 | |
hellaswag,acc,0.4907388966341366,0.004988925410522774,0 | |
hellaswag,acc_norm,0.5834495120493925,0.00491979470467327,0 | |
piqa,acc,0.6605005440696409,0.011048455047173918,0 | |
piqa,acc_norm,0.6534276387377584,0.011103020320872166,0 | |
rte,acc,0.5631768953068592,0.029855247390314945,0 | |
sciq,acc,0.905,0.009276910103103324,0 | |
sciq,acc_norm,0.872,0.010570133761108658,0 | |
storycloze_2016,acc,0.5879208979155531,0.011382271506935862,0 | |
winogrande,acc,0.5177584846093133,0.014043619596174966,0 | |