Muennighoff commited on
Commit
b407d09
1 Parent(s): 0252e3d
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_0.json +87 -0
  2. 4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_0_lm-eval_global_step80108_2023-05-13-21-25-11_0shots_backup.json +87 -0
  3. 4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_1.json +87 -0
  4. 4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_1_lm-eval_global_step80108_2023-05-13-21-25-11_1shots_backup.json +87 -0
  5. 4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_2.json +87 -0
  6. 4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_2_lm-eval_global_step80108_2023-05-13-21-25-11_2shots_backup.json +87 -0
  7. 4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_3.json +87 -0
  8. 4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_3_lm-eval_global_step80108_2023-05-13-21-25-11_3shots_backup.json +87 -0
  9. 4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_4.json +87 -0
  10. 4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_4_lm-eval_global_step80108_2023-05-13-21-25-11_4shots_backup.json +87 -0
  11. 4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_5.json +87 -0
  12. 4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_5_lm-eval_global_step80108_2023-05-13-21-25-11_5shots_backup.json +87 -0
  13. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  14. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt +3 -0
  15. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt +3 -0
  16. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_100_mp_rank_01_optim_states.pt +3 -0
  17. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt +3 -0
  18. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_101_mp_rank_01_optim_states.pt +3 -0
  19. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt +3 -0
  20. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_102_mp_rank_01_optim_states.pt +3 -0
  21. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt +3 -0
  22. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_103_mp_rank_01_optim_states.pt +3 -0
  23. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt +3 -0
  24. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_104_mp_rank_01_optim_states.pt +3 -0
  25. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt +3 -0
  26. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_105_mp_rank_01_optim_states.pt +3 -0
  27. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt +3 -0
  28. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_106_mp_rank_01_optim_states.pt +3 -0
  29. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt +3 -0
  30. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_107_mp_rank_01_optim_states.pt +3 -0
  31. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt +3 -0
  32. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_108_mp_rank_01_optim_states.pt +3 -0
  33. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt +3 -0
  34. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_109_mp_rank_01_optim_states.pt +3 -0
  35. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
  36. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt +3 -0
  37. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt +3 -0
  38. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_110_mp_rank_01_optim_states.pt +3 -0
  39. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt +3 -0
  40. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_111_mp_rank_01_optim_states.pt +3 -0
  41. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt +3 -0
  42. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_112_mp_rank_01_optim_states.pt +3 -0
  43. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt +3 -0
  44. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_113_mp_rank_01_optim_states.pt +3 -0
  45. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt +3 -0
  46. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_114_mp_rank_01_optim_states.pt +3 -0
  47. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt +3 -0
  48. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_115_mp_rank_01_optim_states.pt +3 -0
  49. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt +3 -0
  50. 4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_116_mp_rank_01_optim_states.pt +3 -0
4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_0.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.338,
5
+ "acc_stderr": 0.014965960710224482
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.327,
9
+ "acc_stderr": 0.014842213153411242
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3475,
13
+ "acc_stderr": 0.013751753243291852
14
+ },
15
+ "cb": {
16
+ "acc": 0.4107142857142857,
17
+ "acc_stderr": 0.0663363415035954,
18
+ "f1": 0.2126984126984127
19
+ },
20
+ "copa": {
21
+ "acc": 0.58,
22
+ "acc_stderr": 0.049604496374885836
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.3997211710814579,
26
+ "acc_stderr": 0.004888398535520509,
27
+ "acc_norm": 0.5113523202549293,
28
+ "acc_norm_stderr": 0.004988495127747286
29
+ },
30
+ "rte": {
31
+ "acc": 0.5812274368231047,
32
+ "acc_stderr": 0.029696661081234827
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5295974743488555,
36
+ "acc_stderr": 0.014027843827840088
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6611437733832175,
40
+ "acc_stderr": 0.010945481921408547
41
+ },
42
+ "boolq": {
43
+ "acc": 0.6036697247706422,
44
+ "acc_stderr": 0.008555016706540427
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5568181818181818,
48
+ "acc_stderr": 0.010193324837773493,
49
+ "acc_norm": 0.48274410774410775,
50
+ "acc_norm_stderr": 0.010253671674754631
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.25341296928327645,
54
+ "acc_stderr": 0.012710896778378606,
55
+ "acc_norm": 0.2815699658703072,
56
+ "acc_norm_stderr": 0.013143376735009022
57
+ },
58
+ "sciq": {
59
+ "acc": 0.76,
60
+ "acc_stderr": 0.013512312258920847,
61
+ "acc_norm": 0.669,
62
+ "acc_norm_stderr": 0.014888272588203933
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7399347116430903,
66
+ "acc_stderr": 0.0102348932490613,
67
+ "acc_norm": 0.7410228509249184,
68
+ "acc_norm_stderr": 0.010220966031405621
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_0_lm-eval_global_step80108_2023-05-13-21-25-11_0shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.338,
5
+ "acc_stderr": 0.014965960710224482
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.327,
9
+ "acc_stderr": 0.014842213153411242
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3475,
13
+ "acc_stderr": 0.013751753243291852
14
+ },
15
+ "cb": {
16
+ "acc": 0.4107142857142857,
17
+ "acc_stderr": 0.0663363415035954,
18
+ "f1": 0.2126984126984127
19
+ },
20
+ "copa": {
21
+ "acc": 0.58,
22
+ "acc_stderr": 0.049604496374885836
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.3997211710814579,
26
+ "acc_stderr": 0.004888398535520509,
27
+ "acc_norm": 0.5113523202549293,
28
+ "acc_norm_stderr": 0.004988495127747286
29
+ },
30
+ "rte": {
31
+ "acc": 0.5812274368231047,
32
+ "acc_stderr": 0.029696661081234827
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5295974743488555,
36
+ "acc_stderr": 0.014027843827840088
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6611437733832175,
40
+ "acc_stderr": 0.010945481921408547
41
+ },
42
+ "boolq": {
43
+ "acc": 0.6036697247706422,
44
+ "acc_stderr": 0.008555016706540427
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5568181818181818,
48
+ "acc_stderr": 0.010193324837773493,
49
+ "acc_norm": 0.48274410774410775,
50
+ "acc_norm_stderr": 0.010253671674754631
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.25341296928327645,
54
+ "acc_stderr": 0.012710896778378606,
55
+ "acc_norm": 0.2815699658703072,
56
+ "acc_norm_stderr": 0.013143376735009022
57
+ },
58
+ "sciq": {
59
+ "acc": 0.76,
60
+ "acc_stderr": 0.013512312258920847,
61
+ "acc_norm": 0.669,
62
+ "acc_norm_stderr": 0.014888272588203933
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7399347116430903,
66
+ "acc_stderr": 0.0102348932490613,
67
+ "acc_norm": 0.7410228509249184,
68
+ "acc_norm_stderr": 0.010220966031405621
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_1.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.352,
5
+ "acc_stderr": 0.015110404505648661
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.309,
9
+ "acc_stderr": 0.014619600977206494
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.33,
13
+ "acc_stderr": 0.013579531277800922
14
+ },
15
+ "cb": {
16
+ "acc": 0.42857142857142855,
17
+ "acc_stderr": 0.06672848092813058,
18
+ "f1": 0.30199430199430194
19
+ },
20
+ "copa": {
21
+ "acc": 0.79,
22
+ "acc_stderr": 0.040936018074033256
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.43308105954989046,
26
+ "acc_stderr": 0.004944889545497953,
27
+ "acc_norm": 0.5679147580163314,
28
+ "acc_norm_stderr": 0.004943537242344414
29
+ },
30
+ "rte": {
31
+ "acc": 0.5740072202166066,
32
+ "acc_stderr": 0.02976495674177765
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5730071033938438,
36
+ "acc_stderr": 0.013901878072575058
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6739711384286478,
40
+ "acc_stderr": 0.010839964752045182
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5587155963302752,
44
+ "acc_stderr": 0.008684548127832635
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5879629629629629,
48
+ "acc_stderr": 0.01009976585756276,
49
+ "acc_norm": 0.5547138047138047,
50
+ "acc_norm_stderr": 0.010198171137873881
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.25426621160409557,
54
+ "acc_stderr": 0.012724999945157753,
55
+ "acc_norm": 0.28668941979522183,
56
+ "acc_norm_stderr": 0.013214986329274777
57
+ },
58
+ "sciq": {
59
+ "acc": 0.9,
60
+ "acc_stderr": 0.009491579957525023,
61
+ "acc_norm": 0.869,
62
+ "acc_norm_stderr": 0.010674874844837952
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7519042437431991,
66
+ "acc_stderr": 0.010077118315574717,
67
+ "acc_norm": 0.7519042437431991,
68
+ "acc_norm_stderr": 0.010077118315574705
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_1_lm-eval_global_step80108_2023-05-13-21-25-11_1shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.352,
5
+ "acc_stderr": 0.015110404505648661
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.309,
9
+ "acc_stderr": 0.014619600977206494
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.33,
13
+ "acc_stderr": 0.013579531277800922
14
+ },
15
+ "cb": {
16
+ "acc": 0.42857142857142855,
17
+ "acc_stderr": 0.06672848092813058,
18
+ "f1": 0.30199430199430194
19
+ },
20
+ "copa": {
21
+ "acc": 0.79,
22
+ "acc_stderr": 0.040936018074033256
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.43308105954989046,
26
+ "acc_stderr": 0.004944889545497953,
27
+ "acc_norm": 0.5679147580163314,
28
+ "acc_norm_stderr": 0.004943537242344414
29
+ },
30
+ "rte": {
31
+ "acc": 0.5740072202166066,
32
+ "acc_stderr": 0.02976495674177765
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5730071033938438,
36
+ "acc_stderr": 0.013901878072575058
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6739711384286478,
40
+ "acc_stderr": 0.010839964752045182
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5587155963302752,
44
+ "acc_stderr": 0.008684548127832635
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5879629629629629,
48
+ "acc_stderr": 0.01009976585756276,
49
+ "acc_norm": 0.5547138047138047,
50
+ "acc_norm_stderr": 0.010198171137873881
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.25426621160409557,
54
+ "acc_stderr": 0.012724999945157753,
55
+ "acc_norm": 0.28668941979522183,
56
+ "acc_norm_stderr": 0.013214986329274777
57
+ },
58
+ "sciq": {
59
+ "acc": 0.9,
60
+ "acc_stderr": 0.009491579957525023,
61
+ "acc_norm": 0.869,
62
+ "acc_norm_stderr": 0.010674874844837952
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7519042437431991,
66
+ "acc_stderr": 0.010077118315574717,
67
+ "acc_norm": 0.7519042437431991,
68
+ "acc_norm_stderr": 0.010077118315574705
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_2.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.328,
5
+ "acc_stderr": 0.014853842487270334
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.332,
9
+ "acc_stderr": 0.014899597242811487
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.32416666666666666,
13
+ "acc_stderr": 0.013517438120881627
14
+ },
15
+ "cb": {
16
+ "acc": 0.35714285714285715,
17
+ "acc_stderr": 0.0646095738380922,
18
+ "f1": 0.25170068027210885
19
+ },
20
+ "copa": {
21
+ "acc": 0.73,
22
+ "acc_stderr": 0.0446196043338474
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4473212507468632,
26
+ "acc_stderr": 0.004962010338226348,
27
+ "acc_norm": 0.5805616411073491,
28
+ "acc_norm_stderr": 0.004924586362301661
29
+ },
30
+ "rte": {
31
+ "acc": 0.51985559566787,
32
+ "acc_stderr": 0.030072723167317184
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5516969218626677,
36
+ "acc_stderr": 0.013977171307126349
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.689470871191876,
40
+ "acc_stderr": 0.010700112173178448
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5660550458715596,
44
+ "acc_stderr": 0.008668405003744129
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6165824915824916,
48
+ "acc_stderr": 0.009976995068264717,
49
+ "acc_norm": 0.5816498316498316,
50
+ "acc_norm_stderr": 0.010122061470742861
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.26535836177474403,
54
+ "acc_stderr": 0.012902554762313966,
55
+ "acc_norm": 0.31569965870307165,
56
+ "acc_norm_stderr": 0.013582571095815291
57
+ },
58
+ "sciq": {
59
+ "acc": 0.911,
60
+ "acc_stderr": 0.009008893392651528,
61
+ "acc_norm": 0.902,
62
+ "acc_norm_stderr": 0.009406619184621238
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7426550598476604,
66
+ "acc_stderr": 0.01019992106479251,
67
+ "acc_norm": 0.7519042437431991,
68
+ "acc_norm_stderr": 0.010077118315574708
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_2_lm-eval_global_step80108_2023-05-13-21-25-11_2shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.328,
5
+ "acc_stderr": 0.014853842487270334
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.332,
9
+ "acc_stderr": 0.014899597242811487
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.32416666666666666,
13
+ "acc_stderr": 0.013517438120881627
14
+ },
15
+ "cb": {
16
+ "acc": 0.35714285714285715,
17
+ "acc_stderr": 0.0646095738380922,
18
+ "f1": 0.25170068027210885
19
+ },
20
+ "copa": {
21
+ "acc": 0.73,
22
+ "acc_stderr": 0.0446196043338474
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4473212507468632,
26
+ "acc_stderr": 0.004962010338226348,
27
+ "acc_norm": 0.5805616411073491,
28
+ "acc_norm_stderr": 0.004924586362301661
29
+ },
30
+ "rte": {
31
+ "acc": 0.51985559566787,
32
+ "acc_stderr": 0.030072723167317184
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5516969218626677,
36
+ "acc_stderr": 0.013977171307126349
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.689470871191876,
40
+ "acc_stderr": 0.010700112173178448
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5660550458715596,
44
+ "acc_stderr": 0.008668405003744129
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6165824915824916,
48
+ "acc_stderr": 0.009976995068264717,
49
+ "acc_norm": 0.5816498316498316,
50
+ "acc_norm_stderr": 0.010122061470742861
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.26535836177474403,
54
+ "acc_stderr": 0.012902554762313966,
55
+ "acc_norm": 0.31569965870307165,
56
+ "acc_norm_stderr": 0.013582571095815291
57
+ },
58
+ "sciq": {
59
+ "acc": 0.911,
60
+ "acc_stderr": 0.009008893392651528,
61
+ "acc_norm": 0.902,
62
+ "acc_norm_stderr": 0.009406619184621238
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7426550598476604,
66
+ "acc_stderr": 0.01019992106479251,
67
+ "acc_norm": 0.7519042437431991,
68
+ "acc_norm_stderr": 0.010077118315574708
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_3.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.32,
5
+ "acc_stderr": 0.014758652303574888
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.344,
9
+ "acc_stderr": 0.015029633724408948
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3333333333333333,
13
+ "acc_stderr": 0.013613950010225608
14
+ },
15
+ "cb": {
16
+ "acc": 0.5178571428571429,
17
+ "acc_stderr": 0.06737697508644648,
18
+ "f1": 0.47008547008547
19
+ },
20
+ "copa": {
21
+ "acc": 0.72,
22
+ "acc_stderr": 0.04512608598542127
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.44991037641904,
26
+ "acc_stderr": 0.004964679845918433,
27
+ "acc_norm": 0.5893248356901015,
28
+ "acc_norm_stderr": 0.00490950953852518
29
+ },
30
+ "rte": {
31
+ "acc": 0.5090252707581228,
32
+ "acc_stderr": 0.030091559826331334
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5540647198105761,
36
+ "acc_stderr": 0.01397009348233069
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7044361304115446,
40
+ "acc_stderr": 0.010551778839373787
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5801223241590214,
44
+ "acc_stderr": 0.008632045504781744
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6186868686868687,
48
+ "acc_stderr": 0.00996654249717102,
49
+ "acc_norm": 0.6073232323232324,
50
+ "acc_norm_stderr": 0.01002064655553869
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.27047781569965873,
54
+ "acc_stderr": 0.012980954547659554,
55
+ "acc_norm": 0.3054607508532423,
56
+ "acc_norm_stderr": 0.013460080478002501
57
+ },
58
+ "sciq": {
59
+ "acc": 0.926,
60
+ "acc_stderr": 0.008282064512704159,
61
+ "acc_norm": 0.914,
62
+ "acc_norm_stderr": 0.008870325962594766
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7524483133841132,
66
+ "acc_stderr": 0.010069703966857113,
67
+ "acc_norm": 0.7568008705114254,
68
+ "acc_norm_stderr": 0.010009611953858917
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_3_lm-eval_global_step80108_2023-05-13-21-25-11_3shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.32,
5
+ "acc_stderr": 0.014758652303574888
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.344,
9
+ "acc_stderr": 0.015029633724408948
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3333333333333333,
13
+ "acc_stderr": 0.013613950010225608
14
+ },
15
+ "cb": {
16
+ "acc": 0.5178571428571429,
17
+ "acc_stderr": 0.06737697508644648,
18
+ "f1": 0.47008547008547
19
+ },
20
+ "copa": {
21
+ "acc": 0.72,
22
+ "acc_stderr": 0.04512608598542127
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.44991037641904,
26
+ "acc_stderr": 0.004964679845918433,
27
+ "acc_norm": 0.5893248356901015,
28
+ "acc_norm_stderr": 0.00490950953852518
29
+ },
30
+ "rte": {
31
+ "acc": 0.5090252707581228,
32
+ "acc_stderr": 0.030091559826331334
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5540647198105761,
36
+ "acc_stderr": 0.01397009348233069
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7044361304115446,
40
+ "acc_stderr": 0.010551778839373787
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5801223241590214,
44
+ "acc_stderr": 0.008632045504781744
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6186868686868687,
48
+ "acc_stderr": 0.00996654249717102,
49
+ "acc_norm": 0.6073232323232324,
50
+ "acc_norm_stderr": 0.01002064655553869
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.27047781569965873,
54
+ "acc_stderr": 0.012980954547659554,
55
+ "acc_norm": 0.3054607508532423,
56
+ "acc_norm_stderr": 0.013460080478002501
57
+ },
58
+ "sciq": {
59
+ "acc": 0.926,
60
+ "acc_stderr": 0.008282064512704159,
61
+ "acc_norm": 0.914,
62
+ "acc_norm_stderr": 0.008870325962594766
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7524483133841132,
66
+ "acc_stderr": 0.010069703966857113,
67
+ "acc_norm": 0.7568008705114254,
68
+ "acc_norm_stderr": 0.010009611953858917
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_4.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.35,
5
+ "acc_stderr": 0.015090650341444233
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.354,
9
+ "acc_stderr": 0.015129868238451773
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.34,
13
+ "acc_stderr": 0.013680495725767792
14
+ },
15
+ "cb": {
16
+ "acc": 0.35714285714285715,
17
+ "acc_stderr": 0.06460957383809218,
18
+ "f1": 0.23931623931623933
19
+ },
20
+ "copa": {
21
+ "acc": 0.75,
22
+ "acc_stderr": 0.04351941398892446
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.45120493925512845,
26
+ "acc_stderr": 0.004965963647210312,
27
+ "acc_norm": 0.5985859390559649,
28
+ "acc_norm_stderr": 0.004891826692722828
29
+ },
30
+ "rte": {
31
+ "acc": 0.4693140794223827,
32
+ "acc_stderr": 0.03003973059219781
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5603788476716653,
36
+ "acc_stderr": 0.01394964977601569
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7108498129342598,
40
+ "acc_stderr": 0.010484068799942079
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5691131498470948,
44
+ "acc_stderr": 0.008661108320775376
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6266835016835017,
48
+ "acc_stderr": 0.00992500914280291,
49
+ "acc_norm": 0.6043771043771043,
50
+ "acc_norm_stderr": 0.01003374139343099
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.27986348122866894,
54
+ "acc_stderr": 0.01311904089772592,
55
+ "acc_norm": 0.3191126279863481,
56
+ "acc_norm_stderr": 0.013621696119173307
57
+ },
58
+ "sciq": {
59
+ "acc": 0.932,
60
+ "acc_stderr": 0.007964887911291603,
61
+ "acc_norm": 0.915,
62
+ "acc_norm_stderr": 0.008823426366942333
63
+ },
64
+ "piqa": {
65
+ "acc": 0.750272034820457,
66
+ "acc_stderr": 0.010099232969867483,
67
+ "acc_norm": 0.7578890097932536,
68
+ "acc_norm_stderr": 0.009994371269104393
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_4_lm-eval_global_step80108_2023-05-13-21-25-11_4shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.35,
5
+ "acc_stderr": 0.015090650341444233
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.354,
9
+ "acc_stderr": 0.015129868238451773
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.34,
13
+ "acc_stderr": 0.013680495725767792
14
+ },
15
+ "cb": {
16
+ "acc": 0.35714285714285715,
17
+ "acc_stderr": 0.06460957383809218,
18
+ "f1": 0.23931623931623933
19
+ },
20
+ "copa": {
21
+ "acc": 0.75,
22
+ "acc_stderr": 0.04351941398892446
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.45120493925512845,
26
+ "acc_stderr": 0.004965963647210312,
27
+ "acc_norm": 0.5985859390559649,
28
+ "acc_norm_stderr": 0.004891826692722828
29
+ },
30
+ "rte": {
31
+ "acc": 0.4693140794223827,
32
+ "acc_stderr": 0.03003973059219781
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5603788476716653,
36
+ "acc_stderr": 0.01394964977601569
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7108498129342598,
40
+ "acc_stderr": 0.010484068799942079
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5691131498470948,
44
+ "acc_stderr": 0.008661108320775376
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6266835016835017,
48
+ "acc_stderr": 0.00992500914280291,
49
+ "acc_norm": 0.6043771043771043,
50
+ "acc_norm_stderr": 0.01003374139343099
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.27986348122866894,
54
+ "acc_stderr": 0.01311904089772592,
55
+ "acc_norm": 0.3191126279863481,
56
+ "acc_norm_stderr": 0.013621696119173307
57
+ },
58
+ "sciq": {
59
+ "acc": 0.932,
60
+ "acc_stderr": 0.007964887911291603,
61
+ "acc_norm": 0.915,
62
+ "acc_norm_stderr": 0.008823426366942333
63
+ },
64
+ "piqa": {
65
+ "acc": 0.750272034820457,
66
+ "acc_stderr": 0.010099232969867483,
67
+ "acc_norm": 0.7578890097932536,
68
+ "acc_norm_stderr": 0.009994371269104393
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_5.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.368,
5
+ "acc_stderr": 0.0152580735615218
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.327,
9
+ "acc_stderr": 0.01484221315341124
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3475,
13
+ "acc_stderr": 0.013751753243291854
14
+ },
15
+ "cb": {
16
+ "acc": 0.5535714285714286,
17
+ "acc_stderr": 0.06703189227942395,
18
+ "f1": 0.37948717948717947
19
+ },
20
+ "copa": {
21
+ "acc": 0.79,
22
+ "acc_stderr": 0.040936018074033256
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4523003385779725,
26
+ "acc_stderr": 0.004967023435680015,
27
+ "acc_norm": 0.6046604262099183,
28
+ "acc_norm_stderr": 0.004879242848473468
29
+ },
30
+ "rte": {
31
+ "acc": 0.5703971119133574,
32
+ "acc_stderr": 0.02979666882912467
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.579321231254933,
36
+ "acc_stderr": 0.013874526372008322
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7183324425440941,
40
+ "acc_stderr": 0.010401844358587665
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5697247706422018,
44
+ "acc_stderr": 0.008659608602932495
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6275252525252525,
48
+ "acc_stderr": 0.009920469215736024,
49
+ "acc_norm": 0.6182659932659933,
50
+ "acc_norm_stderr": 0.009968648851839672
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2764505119453925,
54
+ "acc_stderr": 0.013069662474252425,
55
+ "acc_norm": 0.3199658703071672,
56
+ "acc_norm_stderr": 0.013631345807016193
57
+ },
58
+ "sciq": {
59
+ "acc": 0.922,
60
+ "acc_stderr": 0.008484573530118583,
61
+ "acc_norm": 0.922,
62
+ "acc_norm_stderr": 0.00848457353011858
63
+ },
64
+ "piqa": {
65
+ "acc": 0.749183895538629,
66
+ "acc_stderr": 0.010113869547069044,
67
+ "acc_norm": 0.7584330794341676,
68
+ "acc_norm_stderr": 0.009986718001804456
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b84b70c4pyseed1/evaluation/rankeval/4b284b84b70c4pyseed1_5_lm-eval_global_step80108_2023-05-13-21-25-11_5shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.368,
5
+ "acc_stderr": 0.0152580735615218
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.327,
9
+ "acc_stderr": 0.01484221315341124
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3475,
13
+ "acc_stderr": 0.013751753243291854
14
+ },
15
+ "cb": {
16
+ "acc": 0.5535714285714286,
17
+ "acc_stderr": 0.06703189227942395,
18
+ "f1": 0.37948717948717947
19
+ },
20
+ "copa": {
21
+ "acc": 0.79,
22
+ "acc_stderr": 0.040936018074033256
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4523003385779725,
26
+ "acc_stderr": 0.004967023435680015,
27
+ "acc_norm": 0.6046604262099183,
28
+ "acc_norm_stderr": 0.004879242848473468
29
+ },
30
+ "rte": {
31
+ "acc": 0.5703971119133574,
32
+ "acc_stderr": 0.02979666882912467
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.579321231254933,
36
+ "acc_stderr": 0.013874526372008322
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7183324425440941,
40
+ "acc_stderr": 0.010401844358587665
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5697247706422018,
44
+ "acc_stderr": 0.008659608602932495
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.6275252525252525,
48
+ "acc_stderr": 0.009920469215736024,
49
+ "acc_norm": 0.6182659932659933,
50
+ "acc_norm_stderr": 0.009968648851839672
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2764505119453925,
54
+ "acc_stderr": 0.013069662474252425,
55
+ "acc_norm": 0.3199658703071672,
56
+ "acc_norm_stderr": 0.013631345807016193
57
+ },
58
+ "sciq": {
59
+ "acc": 0.922,
60
+ "acc_stderr": 0.008484573530118583,
61
+ "acc_norm": 0.922,
62
+ "acc_norm_stderr": 0.00848457353011858
63
+ },
64
+ "piqa": {
65
+ "acc": 0.749183895538629,
66
+ "acc_stderr": 0.010113869547069044,
67
+ "acc_norm": 0.7584330794341676,
68
+ "acc_norm_stderr": 0.009986718001804456
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3cf7f60ac6d147887abe3ff2c194e5954d6b1305d5a55c15dde827c8cc3f2b1
3
+ size 199058647
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:163e4207879a6e61824eb689742e91bbfaf8296a9a3461bb68299cef09d74609
3
+ size 199058647
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33973103548f32e6db4ef5cb27e6e894bea560cb0320cadbb6a0f85a76d8b692
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_100_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1528bdaae32490a4c182defc754ef7d9bec199fc2e2664d1674f26f8c5e6f8a
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a885a98f6c4c41025aac12e28ea9b76262379e35a0764ccaf72cc5d8247a704e
3
+ size 199058797
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_101_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca6d9b3facb81b746917e504de31eca47a54617caa1611a272abdc4f926fa07a
3
+ size 199058797
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b3d357f12bb79ac11da0dca49aefcc23594fdb460296e5ea69850780b16fd05
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_102_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08b8046bdabdf32ad7e6b330558ddf0f0b5fdc90b2b8b157e02de955661f2a25
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:badbc4e8a492e6d642d6087666797047812df9c4b2dcc7c0a2621a8c8deec718
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_103_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9376fb1d3ccb3af5047dc5553efe15226d0e737427d0ef887d5e671e3089035
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebbe47f965ea1e9ce77cd405b89234f5321c6f445b5b200d28b834776be5cac5
3
+ size 199058797
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_104_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3151c51d624001e0d5ab1a48399f0ff32de655c12ae4267248936907e78d164
3
+ size 199058797
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:493f586dc34fcecab796ca5f29f5052d19189e769ddc1a3545b5e4c14fff03a9
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_105_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:533e83ea671fa8bb216988ca1e936df64848022078c6e9fdfd82cbbf76dae675
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8b68abe147c69c00f545a33499ee85d4d7d18249c64e379e0aa4694c0de11be
3
+ size 199058669
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_106_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afb408c9a1ad5fad7c61c89e1c6c2a9203b15b057662a3426cd7f8039c73f072
3
+ size 199058669
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7256518a7c34cfe5f8838f63dcd0403950a043c37876749fb0643b93ed7f0c1c
3
+ size 199058797
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_107_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e48572b7c78c79915ae800b9153904c6facd4ddf0344173a4a2b5e938948aed
3
+ size 199058797
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:797937b99d181b5928edcdf60f3f6ee8c99f9300662eba5551923a061178976f
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_108_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96d7fab168a4a9de5aa78e96f40c2dffc6a6cc5f02db0fa62aa963a636955612
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bfcc53169f3786285f0991af9a06a908675c2b30a876969c15c8962924520dc
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_109_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef2a1e29fb80b21e89e6ef6aabf4e63b2408dac23eae5b70ccabc30e0de1d548
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5131a6ff1286ef5f29a183f35640febe71dc9b8ae36ed3c136887f85904e2597
3
+ size 199058658
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba0732cc5fa2747d7d0fccc1983284b9265017d306c68eac5d23f5b04bd5c370
3
+ size 199058658
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53b0e1d4b3a512e878efcbfc0f96091214bbc533fefbb23e99222530f8aa5495
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_110_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd5fab6262fa4be171139a2c2709e9125dfb967909efe390d668d845236fc57
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c2055b081269529c755f2af1dfd7c2d218762f7379a39cdb07e1af8227f4f70
3
+ size 199058797
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_111_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5303b76df29cc6abb9122896c30c46b7b15cbdb2ecd57d3b5717a6bc15543b3
3
+ size 199058797
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c35935291060230a47952a39b03153fc2d8958a04b6b7556c1ba7df09125f63
3
+ size 199058605
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_112_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc7fe1851b5abbb80e9b1e4e2e3d1703981a231879e9d15b1f4976c1af5719e5
3
+ size 199058605
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0e2ef2b25cd4fb77173a9bfaefe75390057a55811c47fd84fa3d1d8152766fb
3
+ size 199058669
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_113_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:442f2956c2307fbf7c0b7919afc676bf4aad7b8e1246f57ef53918525a979603
3
+ size 199058669
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa2de280d4fb10442f733ad7e6f5ad6d50bd2ff463fd79af7775e9ceaecfb771
3
+ size 199058797
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_114_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da3f7c302a7f891a67d68ec1d47a9a159ab1fa11e68ac3a0eb359bef600ce73a
3
+ size 199058797
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df04c61188df5e7ac501b5ea09f74d358e392d1b06a4e163b5b5639b2c14c3f6
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_115_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35ce5259933102b56fcb8b1cc378b2a44fe71f66114936bae0b4fcebd9188cf6
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe4a7330004cd52c060b1f492e455ba5cdb5240d496ebd2d6b3c0cd7be141480
3
+ size 199058733
4b284b84b70c4pyseed1/global_step80108/bf16_zero_pp_rank_116_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51f0c44701eb121b1083b42913cb9a36fa8c986503b81cc4572ba3742d232c9e
3
+ size 199058733