lm1-misc-pile / 619m22b22b /evaluation /rankeval /lm1-619m-22b-results_lm-eval_global_step41007_2023-01-24-17-05-33_5shots.json
Muennighoff's picture
Add
8393ff0
raw
history blame contribute delete
No virus
2.46 kB
{
"results": {
"anli_r1": {
"acc": 0.313,
"acc_stderr": 0.014671272822977892
},
"anli_r2": {
"acc": 0.324,
"acc_stderr": 0.014806864733738856
},
"anli_r3": {
"acc": 0.3383333333333333,
"acc_stderr": 0.01366414400661827
},
"cb": {
"acc": 0.39285714285714285,
"acc_stderr": 0.0658538889806635,
"f1": 0.2593406593406593
},
"copa": {
"acc": 0.68,
"acc_stderr": 0.04688261722621505
},
"hellaswag": {
"acc": 0.3018323043218482,
"acc_stderr": 0.0045811472479631975,
"acc_norm": 0.33608842859988053,
"acc_norm_stderr": 0.004714041652598617
},
"rte": {
"acc": 0.5523465703971119,
"acc_stderr": 0.02993107036293953
},
"winogrande": {
"acc": 0.510655090765588,
"acc_stderr": 0.014049294536290396
},
"storycloze_2016": {
"acc": 0.5916622127204704,
"acc_stderr": 0.011366477562142522
},
"boolq": {
"acc": 0.518960244648318,
"acc_stderr": 0.008738765179491936
},
"arc_easy": {
"acc": 0.4983164983164983,
"acc_stderr": 0.010259725364582795,
"acc_norm": 0.47685185185185186,
"acc_norm_stderr": 0.010248782484554474
},
"arc_challenge": {
"acc": 0.21245733788395904,
"acc_stderr": 0.011953482906582954,
"acc_norm": 0.2440273037542662,
"acc_norm_stderr": 0.012551447627856253
},
"sciq": {
"acc": 0.852,
"acc_stderr": 0.011234866364235237,
"acc_norm": 0.832,
"acc_norm_stderr": 0.01182860583145427
},
"piqa": {
"acc": 0.6458106637649619,
"acc_stderr": 0.011158755672626112,
"acc_norm": 0.6474428726877041,
"acc_norm_stderr": 0.011147074365010456
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}