lm1-misc-pile / 1b121b21b /evaluation /lm1-1b1-21b-oscar-results_lm-eval_global_step52452_2022-12-28-10-34-30.csv
Muennighoff's picture
Add
ca4f9b6
raw
history blame
1.05 kB
task,metric,value,err,version
anli_r1,acc,0.286,0.014297146862517908,0
anli_r2,acc,0.304,0.01455320568795044,0
anli_r3,acc,0.3175,0.013443538681348054,0
arc_challenge,acc,0.26535836177474403,0.012902554762313969,0
arc_challenge,acc_norm,0.2909556313993174,0.013273077865907573,0
arc_easy,acc,0.5122053872053872,0.010256726235129016,0
arc_easy,acc_norm,0.4877946127946128,0.01025672623512901,0
boolq,acc,0.6862385321100918,0.008115773046958279,1
cb,acc,0.35714285714285715,0.06460957383809221,1
cb,f1,0.27666815942678014,,1
copa,acc,0.69,0.04648231987117316,0
hellaswag,acc,0.4907388966341366,0.004988925410522774,0
hellaswag,acc_norm,0.5834495120493925,0.00491979470467327,0
piqa,acc,0.6605005440696409,0.011048455047173918,0
piqa,acc_norm,0.6534276387377584,0.011103020320872166,0
rte,acc,0.5631768953068592,0.029855247390314945,0
sciq,acc,0.905,0.009276910103103324,0
sciq,acc_norm,0.872,0.010570133761108658,0
storycloze_2016,acc,0.5879208979155531,0.011382271506935862,0
winogrande,acc,0.5177584846093133,0.014043619596174966,0