lm1-misc-pile
/
1b58b88b8
/1b58b88b8pile
/evaluation
/lm1-1b5-8b8-results_lm-eval_global_step16765_2023-01-24-13-53-33_2shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.307,0.01459328489285262,0 | |
anli_r2,acc,0.336,0.014944140233795027,0 | |
anli_r3,acc,0.3308333333333333,0.013588208070709,0 | |
arc_challenge,acc,0.20136518771331058,0.011718927477444272,0 | |
arc_challenge,acc_norm,0.2440273037542662,0.012551447627856253,0 | |
arc_easy,acc,0.45454545454545453,0.010217299762709428,0 | |
arc_easy,acc_norm,0.4305555555555556,0.010160345396860075,0 | |
boolq,acc,0.5617737003058104,0.008678056241208772,1 | |
cb,acc,0.44642857142857145,0.06703189227942398,1 | |
cb,f1,0.2809560132214594,,1 | |
copa,acc,0.66,0.04760952285695237,0 | |
hellaswag,acc,0.28540131447918743,0.0045068240943332985,0 | |
hellaswag,acc_norm,0.30790679147580163,0.004606843344517482,0 | |
piqa,acc,0.6077257889009793,0.011391846744072232,0 | |
piqa,acc_norm,0.6169749727965179,0.011342081709082845,0 | |
rte,acc,0.516245487364621,0.030080573208738064,0 | |
sciq,acc,0.798,0.012702651587655139,0 | |
sciq,acc_norm,0.766,0.013394902889660009,0 | |
storycloze_2016,acc,0.5660074826296099,0.011461234645182191,0 | |
winogrande,acc,0.5043409629044988,0.014051956064076892,0 | |