Muennighoff commited on
Commit
f5f9b77
1 Parent(s): e989436
8b7178b44b/evaluation/rankeval/8b7178b44b_3.json CHANGED
@@ -54,6 +54,18 @@
54
  "acc_stderr": 0.013203196088537369,
55
  "acc_norm": 0.32081911262798635,
56
  "acc_norm_stderr": 0.013640943091946524
 
 
 
 
 
 
 
 
 
 
 
 
57
  }
58
  },
59
  "versions": {
@@ -68,6 +80,8 @@
68
  "storycloze_2016": 0,
69
  "boolq": 1,
70
  "arc_easy": 0,
71
- "arc_challenge": 0
 
 
72
  }
73
  }
 
54
  "acc_stderr": 0.013203196088537369,
55
  "acc_norm": 0.32081911262798635,
56
  "acc_norm_stderr": 0.013640943091946524
57
+ },
58
+ "sciq": {
59
+ "acc": 0.923,
60
+ "acc_stderr": 0.008434580140240651,
61
+ "acc_norm": 0.925,
62
+ "acc_norm_stderr": 0.00833333333333335
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7442872687704026,
66
+ "acc_stderr": 0.010178690109459862,
67
+ "acc_norm": 0.7519042437431991,
68
+ "acc_norm_stderr": 0.010077118315574703
69
  }
70
  },
71
  "versions": {
 
80
  "storycloze_2016": 0,
81
  "boolq": 1,
82
  "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
  }
87
  }
8b7178b44b/evaluation/rankeval/8b7178b44b_3_lm-eval_global_step84877_2023-01-31-11-38-06_3shots_backup.json CHANGED
@@ -54,6 +54,18 @@
54
  "acc_stderr": 0.013203196088537369,
55
  "acc_norm": 0.32081911262798635,
56
  "acc_norm_stderr": 0.013640943091946524
 
 
 
 
 
 
 
 
 
 
 
 
57
  }
58
  },
59
  "versions": {
@@ -68,6 +80,8 @@
68
  "storycloze_2016": 0,
69
  "boolq": 1,
70
  "arc_easy": 0,
71
- "arc_challenge": 0
 
 
72
  }
73
  }
 
54
  "acc_stderr": 0.013203196088537369,
55
  "acc_norm": 0.32081911262798635,
56
  "acc_norm_stderr": 0.013640943091946524
57
+ },
58
+ "sciq": {
59
+ "acc": 0.923,
60
+ "acc_stderr": 0.008434580140240651,
61
+ "acc_norm": 0.925,
62
+ "acc_norm_stderr": 0.00833333333333335
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7442872687704026,
66
+ "acc_stderr": 0.010178690109459862,
67
+ "acc_norm": 0.7519042437431991,
68
+ "acc_norm_stderr": 0.010077118315574703
69
  }
70
  },
71
  "versions": {
 
80
  "storycloze_2016": 0,
81
  "boolq": 1,
82
  "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
  }
87
  }
8b7178b44b/evaluation/rankeval/8b7178b44b_4.json CHANGED
@@ -38,6 +38,34 @@
38
  "storycloze_2016": {
39
  "acc": 0.7097808658471406,
40
  "acc_stderr": 0.010495529690730063
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  }
42
  },
43
  "versions": {
@@ -49,6 +77,11 @@
49
  "hellaswag": 0,
50
  "rte": 0,
51
  "winogrande": 0,
52
- "storycloze_2016": 0
 
 
 
 
 
53
  }
54
  }
 
38
  "storycloze_2016": {
39
  "acc": 0.7097808658471406,
40
  "acc_stderr": 0.010495529690730063
41
+ },
42
+ "boolq": {
43
+ "acc": 0.6241590214067279,
44
+ "acc_stderr": 0.008471147248160114
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6401515151515151,
48
+ "acc_stderr": 0.009848484848484843,
49
+ "acc_norm": 0.6346801346801347,
50
+ "acc_norm_stderr": 0.009880576614806924
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.28924914675767915,
54
+ "acc_stderr": 0.013250012579393443,
55
+ "acc_norm": 0.318259385665529,
56
+ "acc_norm_stderr": 0.013611993916971453
57
+ },
58
+ "sciq": {
59
+ "acc": 0.927,
60
+ "acc_stderr": 0.008230354715244055,
61
+ "acc_norm": 0.928,
62
+ "acc_norm_stderr": 0.008178195576218681
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7453754080522307,
66
+ "acc_stderr": 0.010164432237060487,
67
+ "acc_norm": 0.7448313384113167,
68
+ "acc_norm_stderr": 0.010171571592521834
69
  }
70
  },
71
  "versions": {
 
77
  "hellaswag": 0,
78
  "rte": 0,
79
  "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
  }
87
  }
8b7178b44b/evaluation/rankeval/8b7178b44b_4_lm-eval_global_step84877_2023-01-31-11-38-06_4shots_backup.json CHANGED
@@ -38,6 +38,34 @@
38
  "storycloze_2016": {
39
  "acc": 0.7097808658471406,
40
  "acc_stderr": 0.010495529690730063
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  }
42
  },
43
  "versions": {
@@ -49,6 +77,11 @@
49
  "hellaswag": 0,
50
  "rte": 0,
51
  "winogrande": 0,
52
- "storycloze_2016": 0
 
 
 
 
 
53
  }
54
  }
 
38
  "storycloze_2016": {
39
  "acc": 0.7097808658471406,
40
  "acc_stderr": 0.010495529690730063
41
+ },
42
+ "boolq": {
43
+ "acc": 0.6241590214067279,
44
+ "acc_stderr": 0.008471147248160114
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6401515151515151,
48
+ "acc_stderr": 0.009848484848484843,
49
+ "acc_norm": 0.6346801346801347,
50
+ "acc_norm_stderr": 0.009880576614806924
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.28924914675767915,
54
+ "acc_stderr": 0.013250012579393443,
55
+ "acc_norm": 0.318259385665529,
56
+ "acc_norm_stderr": 0.013611993916971453
57
+ },
58
+ "sciq": {
59
+ "acc": 0.927,
60
+ "acc_stderr": 0.008230354715244055,
61
+ "acc_norm": 0.928,
62
+ "acc_norm_stderr": 0.008178195576218681
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7453754080522307,
66
+ "acc_stderr": 0.010164432237060487,
67
+ "acc_norm": 0.7448313384113167,
68
+ "acc_norm_stderr": 0.010171571592521834
69
  }
70
  },
71
  "versions": {
 
77
  "hellaswag": 0,
78
  "rte": 0,
79
  "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
  }
87
  }
8b7178b44b/evaluation/rankeval/8b7178b44b_5.json CHANGED
@@ -34,6 +34,38 @@
34
  "winogrande": {
35
  "acc": 0.569060773480663,
36
  "acc_stderr": 0.01391779662333596
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  }
38
  },
39
  "versions": {
@@ -44,6 +76,12 @@
44
  "copa": 0,
45
  "hellaswag": 0,
46
  "rte": 0,
47
- "winogrande": 0
 
 
 
 
 
 
48
  }
49
  }
 
34
  "winogrande": {
35
  "acc": 0.569060773480663,
36
  "acc_stderr": 0.01391779662333596
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7097808658471406,
40
+ "acc_stderr": 0.010495529690730063
41
+ },
42
+ "boolq": {
43
+ "acc": 0.6223241590214067,
44
+ "acc_stderr": 0.008479309208281643
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6456228956228957,
48
+ "acc_stderr": 0.00981500403025175,
49
+ "acc_norm": 0.6506734006734006,
50
+ "acc_norm_stderr": 0.0097828534493993
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.29180887372013653,
54
+ "acc_stderr": 0.01328452529240351,
55
+ "acc_norm": 0.33532423208191126,
56
+ "acc_norm_stderr": 0.013796182947785562
57
+ },
58
+ "sciq": {
59
+ "acc": 0.931,
60
+ "acc_stderr": 0.00801893405031515,
61
+ "acc_norm": 0.936,
62
+ "acc_norm_stderr": 0.007743640226919298
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7388465723612623,
66
+ "acc_stderr": 0.010248738649935581,
67
+ "acc_norm": 0.7459194776931447,
68
+ "acc_norm_stderr": 0.010157271999135055
69
  }
70
  },
71
  "versions": {
 
76
  "copa": 0,
77
  "hellaswag": 0,
78
  "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
  }
87
  }
8b7178b44b/evaluation/rankeval/8b7178b44b_5_lm-eval_global_step84877_2023-01-31-11-38-06_5shots_backup.json CHANGED
@@ -34,6 +34,38 @@
34
  "winogrande": {
35
  "acc": 0.569060773480663,
36
  "acc_stderr": 0.01391779662333596
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  }
38
  },
39
  "versions": {
@@ -44,6 +76,12 @@
44
  "copa": 0,
45
  "hellaswag": 0,
46
  "rte": 0,
47
- "winogrande": 0
 
 
 
 
 
 
48
  }
49
  }
 
34
  "winogrande": {
35
  "acc": 0.569060773480663,
36
  "acc_stderr": 0.01391779662333596
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7097808658471406,
40
+ "acc_stderr": 0.010495529690730063
41
+ },
42
+ "boolq": {
43
+ "acc": 0.6223241590214067,
44
+ "acc_stderr": 0.008479309208281643
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6456228956228957,
48
+ "acc_stderr": 0.00981500403025175,
49
+ "acc_norm": 0.6506734006734006,
50
+ "acc_norm_stderr": 0.0097828534493993
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.29180887372013653,
54
+ "acc_stderr": 0.01328452529240351,
55
+ "acc_norm": 0.33532423208191126,
56
+ "acc_norm_stderr": 0.013796182947785562
57
+ },
58
+ "sciq": {
59
+ "acc": 0.931,
60
+ "acc_stderr": 0.00801893405031515,
61
+ "acc_norm": 0.936,
62
+ "acc_norm_stderr": 0.007743640226919298
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7388465723612623,
66
+ "acc_stderr": 0.010248738649935581,
67
+ "acc_norm": 0.7459194776931447,
68
+ "acc_norm_stderr": 0.010157271999135055
69
  }
70
  },
71
  "versions": {
 
76
  "copa": 0,
77
  "hellaswag": 0,
78
  "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
  }
87
  }