lm1-misc-pile
/
619m22b22b
/evaluation
/rankeval
/lm1-619m-22b-results_lm-eval_global_step41007_2023-01-24-17-05-33_5shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.313,0.014671272822977892,0 | |
anli_r2,acc,0.324,0.014806864733738856,0 | |
anli_r3,acc,0.3383333333333333,0.01366414400661827,0 | |
arc_challenge,acc,0.21245733788395904,0.011953482906582954,0 | |
arc_challenge,acc_norm,0.2440273037542662,0.012551447627856253,0 | |
arc_easy,acc,0.4983164983164983,0.010259725364582795,0 | |
arc_easy,acc_norm,0.47685185185185186,0.010248782484554474,0 | |
boolq,acc,0.518960244648318,0.008738765179491936,1 | |
cb,acc,0.39285714285714285,0.0658538889806635,1 | |
cb,f1,0.2593406593406593,,1 | |
copa,acc,0.68,0.04688261722621505,0 | |
hellaswag,acc,0.3018323043218482,0.0045811472479631975,0 | |
hellaswag,acc_norm,0.33608842859988053,0.004714041652598617,0 | |
piqa,acc,0.6458106637649619,0.011158755672626112,0 | |
piqa,acc_norm,0.6474428726877041,0.011147074365010456,0 | |
rte,acc,0.5523465703971119,0.02993107036293953,0 | |
sciq,acc,0.852,0.011234866364235237,0 | |
sciq,acc_norm,0.832,0.01182860583145427,0 | |
storycloze_2016,acc,0.5916622127204704,0.011366477562142522,0 | |
winogrande,acc,0.510655090765588,0.014049294536290396,0 | |