lm1-misc-pile / 619m22b22b /evaluation /rankeval /lm1-619m-22b-results_lm-eval_global_step41007_2023-01-24-17-05-33_5shots.csv
Muennighoff's picture
Add
8393ff0
task,metric,value,err,version
anli_r1,acc,0.313,0.014671272822977892,0
anli_r2,acc,0.324,0.014806864733738856,0
anli_r3,acc,0.3383333333333333,0.01366414400661827,0
arc_challenge,acc,0.21245733788395904,0.011953482906582954,0
arc_challenge,acc_norm,0.2440273037542662,0.012551447627856253,0
arc_easy,acc,0.4983164983164983,0.010259725364582795,0
arc_easy,acc_norm,0.47685185185185186,0.010248782484554474,0
boolq,acc,0.518960244648318,0.008738765179491936,1
cb,acc,0.39285714285714285,0.0658538889806635,1
cb,f1,0.2593406593406593,,1
copa,acc,0.68,0.04688261722621505,0
hellaswag,acc,0.3018323043218482,0.0045811472479631975,0
hellaswag,acc_norm,0.33608842859988053,0.004714041652598617,0
piqa,acc,0.6458106637649619,0.011158755672626112,0
piqa,acc_norm,0.6474428726877041,0.011147074365010456,0
rte,acc,0.5523465703971119,0.02993107036293953,0
sciq,acc,0.852,0.011234866364235237,0
sciq,acc_norm,0.832,0.01182860583145427,0
storycloze_2016,acc,0.5916622127204704,0.011366477562142522,0
winogrande,acc,0.510655090765588,0.014049294536290396,0