Muennighoff commited on
Commit
48e5290
1 Parent(s): 31b4221
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_0.json +87 -0
  2. 4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_0_lm-eval_global_step80108_2023-02-15-11-04-03_0shots_backup.json +87 -0
  3. 4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_1.json +87 -0
  4. 4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_1_lm-eval_global_step80108_2023-02-15-11-04-03_1shots_backup.json +87 -0
  5. 4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_2.json +87 -0
  6. 4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_2_lm-eval_global_step80108_2023-02-15-11-04-03_2shots_backup.json +87 -0
  7. 4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_3.json +87 -0
  8. 4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_3_lm-eval_global_step80108_2023-02-15-11-04-03_3shots_backup.json +87 -0
  9. 4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_4.json +87 -0
  10. 4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_4_lm-eval_global_step80108_2023-02-15-11-04-03_4shots_backup.json +87 -0
  11. 4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_5.json +87 -0
  12. 4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_5_lm-eval_global_step80108_2023-02-15-11-04-03_5shots_backup.json +87 -0
  13. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  14. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt +3 -0
  15. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt +3 -0
  16. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_100_mp_rank_01_optim_states.pt +3 -0
  17. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt +3 -0
  18. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_101_mp_rank_01_optim_states.pt +3 -0
  19. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt +3 -0
  20. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_102_mp_rank_01_optim_states.pt +3 -0
  21. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt +3 -0
  22. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_103_mp_rank_01_optim_states.pt +3 -0
  23. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt +3 -0
  24. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_104_mp_rank_01_optim_states.pt +3 -0
  25. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt +3 -0
  26. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_105_mp_rank_01_optim_states.pt +3 -0
  27. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt +3 -0
  28. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_106_mp_rank_01_optim_states.pt +3 -0
  29. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt +3 -0
  30. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_107_mp_rank_01_optim_states.pt +3 -0
  31. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt +3 -0
  32. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_108_mp_rank_01_optim_states.pt +3 -0
  33. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt +3 -0
  34. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_109_mp_rank_01_optim_states.pt +3 -0
  35. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
  36. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt +3 -0
  37. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt +3 -0
  38. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_110_mp_rank_01_optim_states.pt +3 -0
  39. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt +3 -0
  40. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_111_mp_rank_01_optim_states.pt +3 -0
  41. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt +3 -0
  42. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_112_mp_rank_01_optim_states.pt +3 -0
  43. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt +3 -0
  44. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_113_mp_rank_01_optim_states.pt +3 -0
  45. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt +3 -0
  46. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_114_mp_rank_01_optim_states.pt +3 -0
  47. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt +3 -0
  48. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_115_mp_rank_01_optim_states.pt +3 -0
  49. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt +3 -0
  50. 4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_116_mp_rank_01_optim_states.pt +3 -0
4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_0.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.334,
5
+ "acc_stderr": 0.014922019523732963
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.336,
9
+ "acc_stderr": 0.014944140233795027
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.33666666666666667,
13
+ "acc_stderr": 0.013647602942406396
14
+ },
15
+ "cb": {
16
+ "acc": 0.39285714285714285,
17
+ "acc_stderr": 0.0658538889806635,
18
+ "f1": 0.18803418803418803
19
+ },
20
+ "copa": {
21
+ "acc": 0.76,
22
+ "acc_stderr": 0.04292346959909283
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4736108344951205,
26
+ "acc_stderr": 0.004982826916687148,
27
+ "acc_norm": 0.6192989444333798,
28
+ "acc_norm_stderr": 0.004845668799108534
29
+ },
30
+ "rte": {
31
+ "acc": 0.5306859205776173,
32
+ "acc_stderr": 0.030039730592197812
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5974743488555643,
36
+ "acc_stderr": 0.013782866831703044
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7177979690005345,
40
+ "acc_stderr": 0.010407834479647673
41
+ },
42
+ "boolq": {
43
+ "acc": 0.608868501529052,
44
+ "acc_stderr": 0.008535239054221164
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5989057239057239,
48
+ "acc_stderr": 0.010057051106534364,
49
+ "acc_norm": 0.5290404040404041,
50
+ "acc_norm_stderr": 0.010242463826395614
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2815699658703072,
54
+ "acc_stderr": 0.013143376735009019,
55
+ "acc_norm": 0.29948805460750855,
56
+ "acc_norm_stderr": 0.013385021637313562
57
+ },
58
+ "sciq": {
59
+ "acc": 0.829,
60
+ "acc_stderr": 0.011912216456264604,
61
+ "acc_norm": 0.746,
62
+ "acc_norm_stderr": 0.01377220656516854
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7568008705114254,
66
+ "acc_stderr": 0.01000961195385892,
67
+ "acc_norm": 0.7546245919477693,
68
+ "acc_norm_stderr": 0.010039831320422386
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_0_lm-eval_global_step80108_2023-02-15-11-04-03_0shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.334,
5
+ "acc_stderr": 0.014922019523732963
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.336,
9
+ "acc_stderr": 0.014944140233795027
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.33666666666666667,
13
+ "acc_stderr": 0.013647602942406396
14
+ },
15
+ "cb": {
16
+ "acc": 0.39285714285714285,
17
+ "acc_stderr": 0.0658538889806635,
18
+ "f1": 0.18803418803418803
19
+ },
20
+ "copa": {
21
+ "acc": 0.76,
22
+ "acc_stderr": 0.04292346959909283
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4736108344951205,
26
+ "acc_stderr": 0.004982826916687148,
27
+ "acc_norm": 0.6192989444333798,
28
+ "acc_norm_stderr": 0.004845668799108534
29
+ },
30
+ "rte": {
31
+ "acc": 0.5306859205776173,
32
+ "acc_stderr": 0.030039730592197812
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5974743488555643,
36
+ "acc_stderr": 0.013782866831703044
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7177979690005345,
40
+ "acc_stderr": 0.010407834479647673
41
+ },
42
+ "boolq": {
43
+ "acc": 0.608868501529052,
44
+ "acc_stderr": 0.008535239054221164
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5989057239057239,
48
+ "acc_stderr": 0.010057051106534364,
49
+ "acc_norm": 0.5290404040404041,
50
+ "acc_norm_stderr": 0.010242463826395614
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2815699658703072,
54
+ "acc_stderr": 0.013143376735009019,
55
+ "acc_norm": 0.29948805460750855,
56
+ "acc_norm_stderr": 0.013385021637313562
57
+ },
58
+ "sciq": {
59
+ "acc": 0.829,
60
+ "acc_stderr": 0.011912216456264604,
61
+ "acc_norm": 0.746,
62
+ "acc_norm_stderr": 0.01377220656516854
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7568008705114254,
66
+ "acc_stderr": 0.01000961195385892,
67
+ "acc_norm": 0.7546245919477693,
68
+ "acc_norm_stderr": 0.010039831320422386
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_1.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.335,
5
+ "acc_stderr": 0.014933117490932575
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.326,
9
+ "acc_stderr": 0.014830507204541033
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3441666666666667,
13
+ "acc_stderr": 0.013720551062295756
14
+ },
15
+ "cb": {
16
+ "acc": 0.4107142857142857,
17
+ "acc_stderr": 0.0663363415035954,
18
+ "f1": 0.28917378917378916
19
+ },
20
+ "copa": {
21
+ "acc": 0.78,
22
+ "acc_stderr": 0.04163331998932262
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4714200358494324,
26
+ "acc_stderr": 0.00498162329219619,
27
+ "acc_norm": 0.6203943437562238,
28
+ "acc_norm_stderr": 0.004842969887794082
29
+ },
30
+ "rte": {
31
+ "acc": 0.51985559566787,
32
+ "acc_stderr": 0.030072723167317177
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5682715074980268,
36
+ "acc_stderr": 0.01392087211001071
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7049706039551042,
40
+ "acc_stderr": 0.010546232606962283
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5685015290519878,
44
+ "acc_stderr": 0.008662594569027316
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6132154882154882,
48
+ "acc_stderr": 0.009993308355370968,
49
+ "acc_norm": 0.5774410774410774,
50
+ "acc_norm_stderr": 0.010135978222981071
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2713310580204778,
54
+ "acc_stderr": 0.012993807727545794,
55
+ "acc_norm": 0.302901023890785,
56
+ "acc_norm_stderr": 0.013428241573185349
57
+ },
58
+ "sciq": {
59
+ "acc": 0.868,
60
+ "acc_stderr": 0.010709373963528012,
61
+ "acc_norm": 0.841,
62
+ "acc_norm_stderr": 0.0115694793682713
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7464635473340587,
66
+ "acc_stderr": 0.010150090834551794,
67
+ "acc_norm": 0.749183895538629,
68
+ "acc_norm_stderr": 0.010113869547069046
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_1_lm-eval_global_step80108_2023-02-15-11-04-03_1shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.335,
5
+ "acc_stderr": 0.014933117490932575
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.326,
9
+ "acc_stderr": 0.014830507204541033
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3441666666666667,
13
+ "acc_stderr": 0.013720551062295756
14
+ },
15
+ "cb": {
16
+ "acc": 0.4107142857142857,
17
+ "acc_stderr": 0.0663363415035954,
18
+ "f1": 0.28917378917378916
19
+ },
20
+ "copa": {
21
+ "acc": 0.78,
22
+ "acc_stderr": 0.04163331998932262
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4714200358494324,
26
+ "acc_stderr": 0.00498162329219619,
27
+ "acc_norm": 0.6203943437562238,
28
+ "acc_norm_stderr": 0.004842969887794082
29
+ },
30
+ "rte": {
31
+ "acc": 0.51985559566787,
32
+ "acc_stderr": 0.030072723167317177
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5682715074980268,
36
+ "acc_stderr": 0.01392087211001071
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7049706039551042,
40
+ "acc_stderr": 0.010546232606962283
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5685015290519878,
44
+ "acc_stderr": 0.008662594569027316
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6132154882154882,
48
+ "acc_stderr": 0.009993308355370968,
49
+ "acc_norm": 0.5774410774410774,
50
+ "acc_norm_stderr": 0.010135978222981071
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2713310580204778,
54
+ "acc_stderr": 0.012993807727545794,
55
+ "acc_norm": 0.302901023890785,
56
+ "acc_norm_stderr": 0.013428241573185349
57
+ },
58
+ "sciq": {
59
+ "acc": 0.868,
60
+ "acc_stderr": 0.010709373963528012,
61
+ "acc_norm": 0.841,
62
+ "acc_norm_stderr": 0.0115694793682713
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7464635473340587,
66
+ "acc_stderr": 0.010150090834551794,
67
+ "acc_norm": 0.749183895538629,
68
+ "acc_norm_stderr": 0.010113869547069046
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_2.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.331,
5
+ "acc_stderr": 0.014888272588203945
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.35,
9
+ "acc_stderr": 0.015090650341444231
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.33416666666666667,
13
+ "acc_stderr": 0.013622434813136769
14
+ },
15
+ "cb": {
16
+ "acc": 0.375,
17
+ "acc_stderr": 0.06527912098338669,
18
+ "f1": 0.26182156999767064
19
+ },
20
+ "copa": {
21
+ "acc": 0.75,
22
+ "acc_stderr": 0.04351941398892446
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.468034256124278,
26
+ "acc_stderr": 0.0049795737655758555,
27
+ "acc_norm": 0.6188010356502689,
28
+ "acc_norm_stderr": 0.00484688692976345
29
+ },
30
+ "rte": {
31
+ "acc": 0.5342960288808665,
32
+ "acc_stderr": 0.03002557981936643
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5769534333070244,
36
+ "acc_stderr": 0.013885055359056472
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7151256012827365,
40
+ "acc_stderr": 0.01043751398661172
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5519877675840978,
44
+ "acc_stderr": 0.008697655510897228
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6102693602693603,
48
+ "acc_stderr": 0.01000716939179705,
49
+ "acc_norm": 0.5993265993265994,
50
+ "acc_norm_stderr": 0.010055304474255582
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.28924914675767915,
54
+ "acc_stderr": 0.013250012579393443,
55
+ "acc_norm": 0.30887372013651876,
56
+ "acc_norm_stderr": 0.013501770929344003
57
+ },
58
+ "sciq": {
59
+ "acc": 0.883,
60
+ "acc_stderr": 0.010169287802713329,
61
+ "acc_norm": 0.865,
62
+ "acc_norm_stderr": 0.010811655372416053
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7529923830250272,
66
+ "acc_stderr": 0.010062268140772622,
67
+ "acc_norm": 0.7584330794341676,
68
+ "acc_norm_stderr": 0.00998671800180446
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_2_lm-eval_global_step80108_2023-02-15-11-04-03_2shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.331,
5
+ "acc_stderr": 0.014888272588203945
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.35,
9
+ "acc_stderr": 0.015090650341444231
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.33416666666666667,
13
+ "acc_stderr": 0.013622434813136769
14
+ },
15
+ "cb": {
16
+ "acc": 0.375,
17
+ "acc_stderr": 0.06527912098338669,
18
+ "f1": 0.26182156999767064
19
+ },
20
+ "copa": {
21
+ "acc": 0.75,
22
+ "acc_stderr": 0.04351941398892446
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.468034256124278,
26
+ "acc_stderr": 0.0049795737655758555,
27
+ "acc_norm": 0.6188010356502689,
28
+ "acc_norm_stderr": 0.00484688692976345
29
+ },
30
+ "rte": {
31
+ "acc": 0.5342960288808665,
32
+ "acc_stderr": 0.03002557981936643
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5769534333070244,
36
+ "acc_stderr": 0.013885055359056472
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7151256012827365,
40
+ "acc_stderr": 0.01043751398661172
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5519877675840978,
44
+ "acc_stderr": 0.008697655510897228
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6102693602693603,
48
+ "acc_stderr": 0.01000716939179705,
49
+ "acc_norm": 0.5993265993265994,
50
+ "acc_norm_stderr": 0.010055304474255582
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.28924914675767915,
54
+ "acc_stderr": 0.013250012579393443,
55
+ "acc_norm": 0.30887372013651876,
56
+ "acc_norm_stderr": 0.013501770929344003
57
+ },
58
+ "sciq": {
59
+ "acc": 0.883,
60
+ "acc_stderr": 0.010169287802713329,
61
+ "acc_norm": 0.865,
62
+ "acc_norm_stderr": 0.010811655372416053
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7529923830250272,
66
+ "acc_stderr": 0.010062268140772622,
67
+ "acc_norm": 0.7584330794341676,
68
+ "acc_norm_stderr": 0.00998671800180446
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_3.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.321,
5
+ "acc_stderr": 0.014770821817934645
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.332,
9
+ "acc_stderr": 0.014899597242811482
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3425,
13
+ "acc_stderr": 0.013704669762934728
14
+ },
15
+ "cb": {
16
+ "acc": 0.39285714285714285,
17
+ "acc_stderr": 0.0658538889806635,
18
+ "f1": 0.2736842105263158
19
+ },
20
+ "copa": {
21
+ "acc": 0.76,
22
+ "acc_stderr": 0.04292346959909283
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.46873132842063336,
26
+ "acc_stderr": 0.004980014536539821,
27
+ "acc_norm": 0.6212905795658236,
28
+ "acc_norm_stderr": 0.0048407422067181065
29
+ },
30
+ "rte": {
31
+ "acc": 0.5234657039711191,
32
+ "acc_stderr": 0.03006330041190266
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5895816890292028,
36
+ "acc_stderr": 0.013825107120035866
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7156600748262961,
40
+ "acc_stderr": 0.010431614128665253
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5532110091743119,
44
+ "acc_stderr": 0.008695392261996192
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6178451178451179,
48
+ "acc_stderr": 0.009970747281292436,
49
+ "acc_norm": 0.601010101010101,
50
+ "acc_norm_stderr": 0.010048240683798748
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.27474402730375425,
54
+ "acc_stderr": 0.013044617212771227,
55
+ "acc_norm": 0.3037542662116041,
56
+ "acc_norm_stderr": 0.013438909184778757
57
+ },
58
+ "sciq": {
59
+ "acc": 0.887,
60
+ "acc_stderr": 0.010016552866696846,
61
+ "acc_norm": 0.876,
62
+ "acc_norm_stderr": 0.010427498872343973
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7557127312295974,
66
+ "acc_stderr": 0.010024765172284242,
67
+ "acc_norm": 0.7600652883569097,
68
+ "acc_norm_stderr": 0.009963625892809545
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_3_lm-eval_global_step80108_2023-02-15-11-04-03_3shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.321,
5
+ "acc_stderr": 0.014770821817934645
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.332,
9
+ "acc_stderr": 0.014899597242811482
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3425,
13
+ "acc_stderr": 0.013704669762934728
14
+ },
15
+ "cb": {
16
+ "acc": 0.39285714285714285,
17
+ "acc_stderr": 0.0658538889806635,
18
+ "f1": 0.2736842105263158
19
+ },
20
+ "copa": {
21
+ "acc": 0.76,
22
+ "acc_stderr": 0.04292346959909283
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.46873132842063336,
26
+ "acc_stderr": 0.004980014536539821,
27
+ "acc_norm": 0.6212905795658236,
28
+ "acc_norm_stderr": 0.0048407422067181065
29
+ },
30
+ "rte": {
31
+ "acc": 0.5234657039711191,
32
+ "acc_stderr": 0.03006330041190266
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5895816890292028,
36
+ "acc_stderr": 0.013825107120035866
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7156600748262961,
40
+ "acc_stderr": 0.010431614128665253
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5532110091743119,
44
+ "acc_stderr": 0.008695392261996192
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6178451178451179,
48
+ "acc_stderr": 0.009970747281292436,
49
+ "acc_norm": 0.601010101010101,
50
+ "acc_norm_stderr": 0.010048240683798748
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.27474402730375425,
54
+ "acc_stderr": 0.013044617212771227,
55
+ "acc_norm": 0.3037542662116041,
56
+ "acc_norm_stderr": 0.013438909184778757
57
+ },
58
+ "sciq": {
59
+ "acc": 0.887,
60
+ "acc_stderr": 0.010016552866696846,
61
+ "acc_norm": 0.876,
62
+ "acc_norm_stderr": 0.010427498872343973
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7557127312295974,
66
+ "acc_stderr": 0.010024765172284242,
67
+ "acc_norm": 0.7600652883569097,
68
+ "acc_norm_stderr": 0.009963625892809545
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_4.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.324,
5
+ "acc_stderr": 0.014806864733738856
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.359,
9
+ "acc_stderr": 0.015177264224798596
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3466666666666667,
13
+ "acc_stderr": 0.013744022550571952
14
+ },
15
+ "cb": {
16
+ "acc": 0.3392857142857143,
17
+ "acc_stderr": 0.06384226561930827,
18
+ "f1": 0.2379084967320261
19
+ },
20
+ "copa": {
21
+ "acc": 0.77,
22
+ "acc_stderr": 0.04229525846816506
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.46793467436765585,
26
+ "acc_stderr": 0.004979510001776621,
27
+ "acc_norm": 0.6213901613224457,
28
+ "acc_norm_stderr": 0.004840493603166203
29
+ },
30
+ "rte": {
31
+ "acc": 0.49458483754512633,
32
+ "acc_stderr": 0.030094698123239966
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5816890292028414,
36
+ "acc_stderr": 0.013863669961195911
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7226082308925709,
40
+ "acc_stderr": 0.010353267472010767
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5278287461773701,
44
+ "acc_stderr": 0.008731499445069549
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6195286195286195,
48
+ "acc_stderr": 0.009962305992058577,
49
+ "acc_norm": 0.6052188552188552,
50
+ "acc_norm_stderr": 0.010030038935883598
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.27474402730375425,
54
+ "acc_stderr": 0.013044617212771227,
55
+ "acc_norm": 0.30204778156996587,
56
+ "acc_norm_stderr": 0.01341751914471642
57
+ },
58
+ "sciq": {
59
+ "acc": 0.892,
60
+ "acc_stderr": 0.0098200016513457,
61
+ "acc_norm": 0.889,
62
+ "acc_norm_stderr": 0.009938701010583726
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7568008705114254,
66
+ "acc_stderr": 0.010009611953858922,
67
+ "acc_norm": 0.7671381936887922,
68
+ "acc_norm_stderr": 0.009861236071080757
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_4_lm-eval_global_step80108_2023-02-15-11-04-03_4shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.324,
5
+ "acc_stderr": 0.014806864733738856
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.359,
9
+ "acc_stderr": 0.015177264224798596
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3466666666666667,
13
+ "acc_stderr": 0.013744022550571952
14
+ },
15
+ "cb": {
16
+ "acc": 0.3392857142857143,
17
+ "acc_stderr": 0.06384226561930827,
18
+ "f1": 0.2379084967320261
19
+ },
20
+ "copa": {
21
+ "acc": 0.77,
22
+ "acc_stderr": 0.04229525846816506
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.46793467436765585,
26
+ "acc_stderr": 0.004979510001776621,
27
+ "acc_norm": 0.6213901613224457,
28
+ "acc_norm_stderr": 0.004840493603166203
29
+ },
30
+ "rte": {
31
+ "acc": 0.49458483754512633,
32
+ "acc_stderr": 0.030094698123239966
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5816890292028414,
36
+ "acc_stderr": 0.013863669961195911
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7226082308925709,
40
+ "acc_stderr": 0.010353267472010767
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5278287461773701,
44
+ "acc_stderr": 0.008731499445069549
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6195286195286195,
48
+ "acc_stderr": 0.009962305992058577,
49
+ "acc_norm": 0.6052188552188552,
50
+ "acc_norm_stderr": 0.010030038935883598
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.27474402730375425,
54
+ "acc_stderr": 0.013044617212771227,
55
+ "acc_norm": 0.30204778156996587,
56
+ "acc_norm_stderr": 0.01341751914471642
57
+ },
58
+ "sciq": {
59
+ "acc": 0.892,
60
+ "acc_stderr": 0.0098200016513457,
61
+ "acc_norm": 0.889,
62
+ "acc_norm_stderr": 0.009938701010583726
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7568008705114254,
66
+ "acc_stderr": 0.010009611953858922,
67
+ "acc_norm": 0.7671381936887922,
68
+ "acc_norm_stderr": 0.009861236071080757
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_5.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.339,
5
+ "acc_stderr": 0.014976758771620347
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.322,
9
+ "acc_stderr": 0.014782913600996664
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.35333333333333333,
13
+ "acc_stderr": 0.013804572162314925
14
+ },
15
+ "cb": {
16
+ "acc": 0.375,
17
+ "acc_stderr": 0.06527912098338669,
18
+ "f1": 0.25089094796863864
19
+ },
20
+ "copa": {
21
+ "acc": 0.76,
22
+ "acc_stderr": 0.04292346959909283
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4671380203146783,
26
+ "acc_stderr": 0.004978992721242829,
27
+ "acc_norm": 0.6250746863174667,
28
+ "acc_norm_stderr": 0.004831142570475509
29
+ },
30
+ "rte": {
31
+ "acc": 0.5018050541516246,
32
+ "acc_stderr": 0.030096267148976626
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5832675611681136,
36
+ "acc_stderr": 0.01385625007279632
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7252805986103688,
40
+ "acc_stderr": 0.010322309878339502
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5290519877675841,
44
+ "acc_stderr": 0.00873028052845153
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6195286195286195,
48
+ "acc_stderr": 0.00996230599205857,
49
+ "acc_norm": 0.6136363636363636,
50
+ "acc_norm_stderr": 0.009991296778159615
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.28498293515358364,
54
+ "acc_stderr": 0.013191348179838793,
55
+ "acc_norm": 0.310580204778157,
56
+ "acc_norm_stderr": 0.01352229209805305
57
+ },
58
+ "sciq": {
59
+ "acc": 0.906,
60
+ "acc_stderr": 0.009233052000787736,
61
+ "acc_norm": 0.894,
62
+ "acc_norm_stderr": 0.009739551265785133
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7453754080522307,
66
+ "acc_stderr": 0.01016443223706049,
67
+ "acc_norm": 0.7595212187159956,
68
+ "acc_norm_stderr": 0.009971345364651066
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b42bc4seed2/evaluation/rankeval/4b284b42bc4seed2_5_lm-eval_global_step80108_2023-02-15-11-04-03_5shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.339,
5
+ "acc_stderr": 0.014976758771620347
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.322,
9
+ "acc_stderr": 0.014782913600996664
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.35333333333333333,
13
+ "acc_stderr": 0.013804572162314925
14
+ },
15
+ "cb": {
16
+ "acc": 0.375,
17
+ "acc_stderr": 0.06527912098338669,
18
+ "f1": 0.25089094796863864
19
+ },
20
+ "copa": {
21
+ "acc": 0.76,
22
+ "acc_stderr": 0.04292346959909283
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4671380203146783,
26
+ "acc_stderr": 0.004978992721242829,
27
+ "acc_norm": 0.6250746863174667,
28
+ "acc_norm_stderr": 0.004831142570475509
29
+ },
30
+ "rte": {
31
+ "acc": 0.5018050541516246,
32
+ "acc_stderr": 0.030096267148976626
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5832675611681136,
36
+ "acc_stderr": 0.01385625007279632
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7252805986103688,
40
+ "acc_stderr": 0.010322309878339502
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5290519877675841,
44
+ "acc_stderr": 0.00873028052845153
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6195286195286195,
48
+ "acc_stderr": 0.00996230599205857,
49
+ "acc_norm": 0.6136363636363636,
50
+ "acc_norm_stderr": 0.009991296778159615
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.28498293515358364,
54
+ "acc_stderr": 0.013191348179838793,
55
+ "acc_norm": 0.310580204778157,
56
+ "acc_norm_stderr": 0.01352229209805305
57
+ },
58
+ "sciq": {
59
+ "acc": 0.906,
60
+ "acc_stderr": 0.009233052000787736,
61
+ "acc_norm": 0.894,
62
+ "acc_norm_stderr": 0.009739551265785133
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7453754080522307,
66
+ "acc_stderr": 0.01016443223706049,
67
+ "acc_norm": 0.7595212187159956,
68
+ "acc_norm_stderr": 0.009971345364651066
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a2078697be41e489201c494a24f66005357b6c3c6557a5dc846042d6e696f8c
3
+ size 199058647
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2982ed0ba1bb80db84d5a3f72c3ffdc4b0c526fc4c9257461c2a554c9351d2f9
3
+ size 199058647
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dbcf8190caaa0ef7f63673815608a3a9f8735ba98ddeacc52fab07f9434fbe8
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_100_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b573dc398e77e4b3cd44dea7707e31674b23b24e49053c88498c1e4711c9cc6d
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bfa77ade3af529371640597c67ada8f15a22372330cf841ccb39207a6fd4668
3
+ size 199058797
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_101_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b6a5e827199fac6a765f39fdbfe12e8e206a811e5b3396067a5833f41f916a1
3
+ size 199058797
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b3f500a0680a8cb28552e7b8a8f487ee42e00dafe80541ebeebe73e2fc49913
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_102_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be0d79d669f7787e686fe1a9ea19b54fa7a7b28e384618de44be35ad7e1cf7b4
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f6edaa0991c578f789a316b104a67d43c889c37f19c84fef0d9f9beab15c2b7
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_103_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7050cf08bbfe167073277eedf3f31f942dffc89e834cbba39fd3e20219251366
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b27eddd0eeba8c36a040a8893f7556be00f4c7831705ec513c067927c9645a0b
3
+ size 199058797
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_104_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71b99b0bd71e768d6d40af6e9f33da4a85019b6384d7973872aeb9287164fe38
3
+ size 199058797
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:013b8762a7d7135b1be119d2716b0629179ee8510e7276fd1d239c63dc000ea8
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_105_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ea645a024ea70efa85a27f864bd2743699aceecfc3623b9b06fda954a6f90ce
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f73a1603980521721a431e6d5cc15fa307bad446cb51a6ee3ee36cbda32d9fc
3
+ size 199058669
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_106_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:812f7ecbe125c0938efd96030678e9b5da1358dbb7ab38fedf1662949cd81d47
3
+ size 199058669
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13621e5ccc300afeaa0048ec9b8074616ce7264c42ecd9463d7f21fe82141204
3
+ size 199058797
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_107_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38a5deb60fb8690c8ffaee8263d7091d51ec04696e70beddcd0a6f6f6fe7c78a
3
+ size 199058797
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05938a115e592330627f8a8e6d44c1e3b61993e0a13d7aef4deee59f81a86e7d
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_108_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4e69cb1bd6b51bfe74dc249aec10450c8cebc29a62b0934979f0ea7cf579149
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21c17003da2fc7105cf1c808df5fcbf3458caa32188b8da2306b0e33e2dfcd2a
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_109_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3006911668642207b2a5599e979695511fc7f029d9adcfab58adae71b4dc5791
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69cf9b1d62afac17540ceab8bb3c6744d73f4ecea7d83742563a37c6f9af95ab
3
+ size 199058658
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71e517fc30a037fcdf37dbaf0cb7c20b06e24dbb52284c45eba68f6789baa546
3
+ size 199058658
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0820c337865dce2ba68bc7d5e2f89a2419b5eacdf77d1f77bffe72c7806950e5
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_110_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2003919b0e53b875bec03276d94a2b4869f0aed8703166e948916519ec562c44
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:036e00918115921a44cdd2fc931b406ba589a726075912480268323ac93f13d6
3
+ size 199058797
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_111_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66a9aa46ac0961b07cf2344a59b0d30d3da1dd0771db519ff76a5940051874e5
3
+ size 199058797
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4c6faef14846612b0078e10feab9dfa176a92001000214b27c9accf084c38da
3
+ size 199058605
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_112_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:745218f2ebb8f8368e491342814b081a8fa1be6373da54608db95e5865e9f2a0
3
+ size 199058605
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a644d61352ffcd206a064bd58db57efbb281a079332d6c4fbb209c3e23946b02
3
+ size 199058669
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_113_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:361bef7d6a7ae36b88e493be7080d18a9f0a5d9c2814b41f7aa283bb5e19ba07
3
+ size 199058669
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff56df5f0c784f7405fd4627b07cb487572e35d3df3cbda600ffd7cb16781f8
3
+ size 199058797
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_114_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c71aa266a36b6bc1c0b1ca4963cdf3b5b5adbfd7c42e3823b36084c66df6e9b0
3
+ size 199058797
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f410231a0cba26a62a7459943e3c28d388416d41a46e480732cfe4bf3a9d38bf
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_115_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64a0fe41a45c0e8852ce64891fbb8c43918b0f6e4a066c438ddbbabb30acb465
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09582dbbfbbf3acc9a705df305f7a63eb3aebb3e012e79ab358090e9fc61e535
3
+ size 199058733
4b284b42bc4seed2/global_step80108/bf16_zero_pp_rank_116_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4fd032ae92240b9fa53519b54c19e406cbcb9412dd8a0266e690382fd35ee76
3
+ size 199058733