Muennighoff commited on
Commit
cf524af
1 Parent(s): 08dd229
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +24 -0
  2. 4b284b12bc4opt4/3523361.err +0 -0
  3. 4b284b12bc4opt4/3523361.out +0 -0
  4. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_0.jsonl +0 -0
  5. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_1.jsonl +0 -0
  6. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_2.jsonl +0 -0
  7. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_3.jsonl +0 -0
  8. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_4.jsonl +0 -0
  9. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_5.jsonl +0 -0
  10. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_0.jsonl +0 -0
  11. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_1.jsonl +0 -0
  12. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_2.jsonl +0 -0
  13. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_3.jsonl +0 -0
  14. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_4.jsonl +0 -0
  15. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_5.jsonl +0 -0
  16. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl +0 -0
  17. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl +0 -0
  18. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl +0 -0
  19. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl +0 -0
  20. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl +0 -0
  21. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl +0 -0
  22. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_0.jsonl +0 -0
  23. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_1.jsonl +0 -0
  24. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_2.jsonl +0 -0
  25. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_3.jsonl +0 -0
  26. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_4.jsonl +0 -0
  27. 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_5.jsonl +0 -0
  28. 4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_0.json +87 -0
  29. 4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_0_lm-eval_global_step109672_2023-05-15-10-12-47_0shots_backup.json +87 -0
  30. 4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_1.json +87 -0
  31. 4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_1_lm-eval_global_step109672_2023-05-15-10-12-47_1shots_backup.json +87 -0
  32. 4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_2.json +87 -0
  33. 4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_2_lm-eval_global_step109672_2023-05-15-10-12-47_2shots_backup.json +87 -0
  34. 4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_3.json +73 -0
  35. 4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_3_lm-eval_global_step109672_2023-05-15-10-12-47_3shots_backup.json +73 -0
  36. 4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_4.json +59 -0
  37. 4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_4_lm-eval_global_step109672_2023-05-15-10-12-47_4shots_backup.json +59 -0
  38. 4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_5.json +54 -0
  39. 4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_5_lm-eval_global_step109672_2023-05-15-10-12-47_5shots_backup.json +54 -0
  40. 4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  41. 4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt +3 -0
  42. 4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt +3 -0
  43. 4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt +3 -0
  44. 4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt +3 -0
  45. 4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt +3 -0
  46. 4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt +3 -0
  47. 4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt +3 -0
  48. 4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt +3 -0
  49. 4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt +3 -0
  50. 4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt +3 -0
.gitattributes CHANGED
@@ -3164,3 +3164,27 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
3164
  4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
3165
  4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
3166
  4b284b6bc4/evaluation/generation/examples.4b284b6bc4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3164
  4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
3165
  4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
3166
  4b284b6bc4/evaluation/generation/examples.4b284b6bc4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
3167
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
3168
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
3169
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
3170
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
3171
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
3172
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
3173
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
3174
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
3175
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
3176
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
3177
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
3178
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
3179
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
3180
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
3181
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
3182
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
3183
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
3184
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
3185
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
3186
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
3187
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
3188
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
3189
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
3190
+ 4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
4b284b12bc4opt4/3523361.err ADDED
The diff for this file is too large to render. See raw diff
 
4b284b12bc4opt4/3523361.out ADDED
The diff for this file is too large to render. See raw diff
 
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_0.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_1.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_2.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_3.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_4.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-web_nlg_en_PALM_prompt_5.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_0.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_1.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_2.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_3.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_4.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_GEM-wiki_lingua_en_tldr_en_5.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_0.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_1.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_2.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_3.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_4.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/generation/examples.4b284b12bc4opt4_gem_xsum_article_DOC_summary_5.jsonl ADDED
File without changes
4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_0.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.345,
5
+ "acc_stderr": 0.015039986742055237
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.338,
9
+ "acc_stderr": 0.01496596071022449
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3441666666666667,
13
+ "acc_stderr": 0.013720551062295756
14
+ },
15
+ "cb": {
16
+ "acc": 0.375,
17
+ "acc_stderr": 0.06527912098338669,
18
+ "f1": 0.1986111111111111
19
+ },
20
+ "copa": {
21
+ "acc": 0.8,
22
+ "acc_stderr": 0.040201512610368445
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4736108344951205,
26
+ "acc_stderr": 0.004982826916687145,
27
+ "acc_norm": 0.6177056363274248,
28
+ "acc_norm_stderr": 0.0048495478191344825
29
+ },
30
+ "rte": {
31
+ "acc": 0.5523465703971119,
32
+ "acc_stderr": 0.02993107036293953
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5872138910812944,
36
+ "acc_stderr": 0.013837060648682103
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7049706039551042,
40
+ "acc_stderr": 0.010546232606962287
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5091743119266054,
44
+ "acc_stderr": 0.00874358274491014
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5753367003367004,
48
+ "acc_stderr": 0.01014265368748041,
49
+ "acc_norm": 0.515993265993266,
50
+ "acc_norm_stderr": 0.01025453358928818
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2713310580204778,
54
+ "acc_stderr": 0.012993807727545797,
55
+ "acc_norm": 0.2909556313993174,
56
+ "acc_norm_stderr": 0.013273077865907573
57
+ },
58
+ "sciq": {
59
+ "acc": 0.842,
60
+ "acc_stderr": 0.01153989467755957,
61
+ "acc_norm": 0.751,
62
+ "acc_norm_stderr": 0.013681600278702301
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7388465723612623,
66
+ "acc_stderr": 0.01024873864993558,
67
+ "acc_norm": 0.7437431991294886,
68
+ "acc_norm_stderr": 0.010185787831565058
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_0_lm-eval_global_step109672_2023-05-15-10-12-47_0shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.345,
5
+ "acc_stderr": 0.015039986742055237
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.338,
9
+ "acc_stderr": 0.01496596071022449
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3441666666666667,
13
+ "acc_stderr": 0.013720551062295756
14
+ },
15
+ "cb": {
16
+ "acc": 0.375,
17
+ "acc_stderr": 0.06527912098338669,
18
+ "f1": 0.1986111111111111
19
+ },
20
+ "copa": {
21
+ "acc": 0.8,
22
+ "acc_stderr": 0.040201512610368445
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4736108344951205,
26
+ "acc_stderr": 0.004982826916687145,
27
+ "acc_norm": 0.6177056363274248,
28
+ "acc_norm_stderr": 0.0048495478191344825
29
+ },
30
+ "rte": {
31
+ "acc": 0.5523465703971119,
32
+ "acc_stderr": 0.02993107036293953
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5872138910812944,
36
+ "acc_stderr": 0.013837060648682103
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7049706039551042,
40
+ "acc_stderr": 0.010546232606962287
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5091743119266054,
44
+ "acc_stderr": 0.00874358274491014
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5753367003367004,
48
+ "acc_stderr": 0.01014265368748041,
49
+ "acc_norm": 0.515993265993266,
50
+ "acc_norm_stderr": 0.01025453358928818
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2713310580204778,
54
+ "acc_stderr": 0.012993807727545797,
55
+ "acc_norm": 0.2909556313993174,
56
+ "acc_norm_stderr": 0.013273077865907573
57
+ },
58
+ "sciq": {
59
+ "acc": 0.842,
60
+ "acc_stderr": 0.01153989467755957,
61
+ "acc_norm": 0.751,
62
+ "acc_norm_stderr": 0.013681600278702301
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7388465723612623,
66
+ "acc_stderr": 0.01024873864993558,
67
+ "acc_norm": 0.7437431991294886,
68
+ "acc_norm_stderr": 0.010185787831565058
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_1.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.327,
5
+ "acc_stderr": 0.014842213153411239
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.318,
9
+ "acc_stderr": 0.014734079309311901
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.34,
13
+ "acc_stderr": 0.013680495725767784
14
+ },
15
+ "cb": {
16
+ "acc": 0.42857142857142855,
17
+ "acc_stderr": 0.06672848092813057,
18
+ "f1": 0.2385663082437276
19
+ },
20
+ "copa": {
21
+ "acc": 0.82,
22
+ "acc_stderr": 0.038612291966536955
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4735112527384983,
26
+ "acc_stderr": 0.004982774293927781,
27
+ "acc_norm": 0.6200955984863573,
28
+ "acc_norm_stderr": 0.0048437085503865145
29
+ },
30
+ "rte": {
31
+ "acc": 0.5415162454873647,
32
+ "acc_stderr": 0.029992535385373314
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5872138910812944,
36
+ "acc_stderr": 0.013837060648682106
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.703901656867985,
40
+ "acc_stderr": 0.010557307688475123
41
+ },
42
+ "boolq": {
43
+ "acc": 0.536085626911315,
44
+ "acc_stderr": 0.008722250102078083
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5934343434343434,
48
+ "acc_stderr": 0.01007905641922353,
49
+ "acc_norm": 0.5534511784511784,
50
+ "acc_norm_stderr": 0.010200990076245307
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.28242320819112626,
54
+ "acc_stderr": 0.013155456884097224,
55
+ "acc_norm": 0.310580204778157,
56
+ "acc_norm_stderr": 0.013522292098053054
57
+ },
58
+ "sciq": {
59
+ "acc": 0.85,
60
+ "acc_stderr": 0.0112972398234093,
61
+ "acc_norm": 0.776,
62
+ "acc_norm_stderr": 0.013190830072364464
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7404787812840044,
66
+ "acc_stderr": 0.010227939888173922,
67
+ "acc_norm": 0.7578890097932536,
68
+ "acc_norm_stderr": 0.009994371269104397
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_1_lm-eval_global_step109672_2023-05-15-10-12-47_1shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.327,
5
+ "acc_stderr": 0.014842213153411239
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.318,
9
+ "acc_stderr": 0.014734079309311901
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.34,
13
+ "acc_stderr": 0.013680495725767784
14
+ },
15
+ "cb": {
16
+ "acc": 0.42857142857142855,
17
+ "acc_stderr": 0.06672848092813057,
18
+ "f1": 0.2385663082437276
19
+ },
20
+ "copa": {
21
+ "acc": 0.82,
22
+ "acc_stderr": 0.038612291966536955
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4735112527384983,
26
+ "acc_stderr": 0.004982774293927781,
27
+ "acc_norm": 0.6200955984863573,
28
+ "acc_norm_stderr": 0.0048437085503865145
29
+ },
30
+ "rte": {
31
+ "acc": 0.5415162454873647,
32
+ "acc_stderr": 0.029992535385373314
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5872138910812944,
36
+ "acc_stderr": 0.013837060648682106
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.703901656867985,
40
+ "acc_stderr": 0.010557307688475123
41
+ },
42
+ "boolq": {
43
+ "acc": 0.536085626911315,
44
+ "acc_stderr": 0.008722250102078083
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5934343434343434,
48
+ "acc_stderr": 0.01007905641922353,
49
+ "acc_norm": 0.5534511784511784,
50
+ "acc_norm_stderr": 0.010200990076245307
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.28242320819112626,
54
+ "acc_stderr": 0.013155456884097224,
55
+ "acc_norm": 0.310580204778157,
56
+ "acc_norm_stderr": 0.013522292098053054
57
+ },
58
+ "sciq": {
59
+ "acc": 0.85,
60
+ "acc_stderr": 0.0112972398234093,
61
+ "acc_norm": 0.776,
62
+ "acc_norm_stderr": 0.013190830072364464
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7404787812840044,
66
+ "acc_stderr": 0.010227939888173922,
67
+ "acc_norm": 0.7578890097932536,
68
+ "acc_norm_stderr": 0.009994371269104397
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_2.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.334,
5
+ "acc_stderr": 0.014922019523732963
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.325,
9
+ "acc_stderr": 0.014818724459095526
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.35083333333333333,
13
+ "acc_stderr": 0.013782212417178197
14
+ },
15
+ "cb": {
16
+ "acc": 0.4107142857142857,
17
+ "acc_stderr": 0.06633634150359541,
18
+ "f1": 0.23484848484848486
19
+ },
20
+ "copa": {
21
+ "acc": 0.82,
22
+ "acc_stderr": 0.03861229196653694
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4720175263891655,
26
+ "acc_stderr": 0.004981961097590806,
27
+ "acc_norm": 0.6194981079466242,
28
+ "acc_norm_stderr": 0.0048451800342716195
29
+ },
30
+ "rte": {
31
+ "acc": 0.5018050541516246,
32
+ "acc_stderr": 0.030096267148976626
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.584846093133386,
36
+ "acc_stderr": 0.013848684086658588
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7135221806520577,
40
+ "acc_stderr": 0.01045510591863303
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5461773700305811,
44
+ "acc_stderr": 0.008707680082127857
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.609006734006734,
48
+ "acc_stderr": 0.010012992232540636,
49
+ "acc_norm": 0.5614478114478114,
50
+ "acc_norm_stderr": 0.010182010275471116
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2841296928327645,
54
+ "acc_stderr": 0.013179442447653886,
55
+ "acc_norm": 0.30887372013651876,
56
+ "acc_norm_stderr": 0.013501770929344003
57
+ },
58
+ "sciq": {
59
+ "acc": 0.853,
60
+ "acc_stderr": 0.011203415395160336,
61
+ "acc_norm": 0.783,
62
+ "acc_norm_stderr": 0.01304151375727071
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7464635473340587,
66
+ "acc_stderr": 0.010150090834551786,
67
+ "acc_norm": 0.7557127312295974,
68
+ "acc_norm_stderr": 0.010024765172284247
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_2_lm-eval_global_step109672_2023-05-15-10-12-47_2shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.334,
5
+ "acc_stderr": 0.014922019523732963
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.325,
9
+ "acc_stderr": 0.014818724459095526
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.35083333333333333,
13
+ "acc_stderr": 0.013782212417178197
14
+ },
15
+ "cb": {
16
+ "acc": 0.4107142857142857,
17
+ "acc_stderr": 0.06633634150359541,
18
+ "f1": 0.23484848484848486
19
+ },
20
+ "copa": {
21
+ "acc": 0.82,
22
+ "acc_stderr": 0.03861229196653694
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4720175263891655,
26
+ "acc_stderr": 0.004981961097590806,
27
+ "acc_norm": 0.6194981079466242,
28
+ "acc_norm_stderr": 0.0048451800342716195
29
+ },
30
+ "rte": {
31
+ "acc": 0.5018050541516246,
32
+ "acc_stderr": 0.030096267148976626
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.584846093133386,
36
+ "acc_stderr": 0.013848684086658588
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7135221806520577,
40
+ "acc_stderr": 0.01045510591863303
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5461773700305811,
44
+ "acc_stderr": 0.008707680082127857
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.609006734006734,
48
+ "acc_stderr": 0.010012992232540636,
49
+ "acc_norm": 0.5614478114478114,
50
+ "acc_norm_stderr": 0.010182010275471116
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2841296928327645,
54
+ "acc_stderr": 0.013179442447653886,
55
+ "acc_norm": 0.30887372013651876,
56
+ "acc_norm_stderr": 0.013501770929344003
57
+ },
58
+ "sciq": {
59
+ "acc": 0.853,
60
+ "acc_stderr": 0.011203415395160336,
61
+ "acc_norm": 0.783,
62
+ "acc_norm_stderr": 0.01304151375727071
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7464635473340587,
66
+ "acc_stderr": 0.010150090834551786,
67
+ "acc_norm": 0.7557127312295974,
68
+ "acc_norm_stderr": 0.010024765172284247
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_3.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.329,
5
+ "acc_stderr": 0.014865395385928367
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.325,
9
+ "acc_stderr": 0.014818724459095526
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3458333333333333,
13
+ "acc_stderr": 0.013736245342311012
14
+ },
15
+ "cb": {
16
+ "acc": 0.44642857142857145,
17
+ "acc_stderr": 0.06703189227942398,
18
+ "f1": 0.3862433862433863
19
+ },
20
+ "copa": {
21
+ "acc": 0.85,
22
+ "acc_stderr": 0.03588702812826371
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4733120892252539,
26
+ "acc_stderr": 0.004982668452118941,
27
+ "acc_norm": 0.6216889065923122,
28
+ "acc_norm_stderr": 0.004839746491523517
29
+ },
30
+ "rte": {
31
+ "acc": 0.516245487364621,
32
+ "acc_stderr": 0.030080573208738064
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5785319652722968,
36
+ "acc_stderr": 0.0138780723774976
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.711918760021379,
40
+ "acc_stderr": 0.010472537019822582
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5645259938837921,
44
+ "acc_stderr": 0.008671927333703594
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5997474747474747,
48
+ "acc_stderr": 0.010053550119896129,
49
+ "acc_norm": 0.5749158249158249,
50
+ "acc_norm_stderr": 0.010143966195717845
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2781569965870307,
54
+ "acc_stderr": 0.013094469919538812,
55
+ "acc_norm": 0.3037542662116041,
56
+ "acc_norm_stderr": 0.01343890918477876
57
+ }
58
+ },
59
+ "versions": {
60
+ "anli_r1": 0,
61
+ "anli_r2": 0,
62
+ "anli_r3": 0,
63
+ "cb": 1,
64
+ "copa": 0,
65
+ "hellaswag": 0,
66
+ "rte": 0,
67
+ "winogrande": 0,
68
+ "storycloze_2016": 0,
69
+ "boolq": 1,
70
+ "arc_easy": 0,
71
+ "arc_challenge": 0
72
+ }
73
+ }
4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_3_lm-eval_global_step109672_2023-05-15-10-12-47_3shots_backup.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.329,
5
+ "acc_stderr": 0.014865395385928367
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.325,
9
+ "acc_stderr": 0.014818724459095526
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3458333333333333,
13
+ "acc_stderr": 0.013736245342311012
14
+ },
15
+ "cb": {
16
+ "acc": 0.44642857142857145,
17
+ "acc_stderr": 0.06703189227942398,
18
+ "f1": 0.3862433862433863
19
+ },
20
+ "copa": {
21
+ "acc": 0.85,
22
+ "acc_stderr": 0.03588702812826371
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4733120892252539,
26
+ "acc_stderr": 0.004982668452118941,
27
+ "acc_norm": 0.6216889065923122,
28
+ "acc_norm_stderr": 0.004839746491523517
29
+ },
30
+ "rte": {
31
+ "acc": 0.516245487364621,
32
+ "acc_stderr": 0.030080573208738064
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5785319652722968,
36
+ "acc_stderr": 0.0138780723774976
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.711918760021379,
40
+ "acc_stderr": 0.010472537019822582
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5645259938837921,
44
+ "acc_stderr": 0.008671927333703594
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5997474747474747,
48
+ "acc_stderr": 0.010053550119896129,
49
+ "acc_norm": 0.5749158249158249,
50
+ "acc_norm_stderr": 0.010143966195717845
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2781569965870307,
54
+ "acc_stderr": 0.013094469919538812,
55
+ "acc_norm": 0.3037542662116041,
56
+ "acc_norm_stderr": 0.01343890918477876
57
+ }
58
+ },
59
+ "versions": {
60
+ "anli_r1": 0,
61
+ "anli_r2": 0,
62
+ "anli_r3": 0,
63
+ "cb": 1,
64
+ "copa": 0,
65
+ "hellaswag": 0,
66
+ "rte": 0,
67
+ "winogrande": 0,
68
+ "storycloze_2016": 0,
69
+ "boolq": 1,
70
+ "arc_easy": 0,
71
+ "arc_challenge": 0
72
+ }
73
+ }
4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_4.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.342,
5
+ "acc_stderr": 0.015008706182121731
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.318,
9
+ "acc_stderr": 0.014734079309311901
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3275,
13
+ "acc_stderr": 0.013553211167251944
14
+ },
15
+ "cb": {
16
+ "acc": 0.4642857142857143,
17
+ "acc_stderr": 0.0672477765493766,
18
+ "f1": 0.3282312925170068
19
+ },
20
+ "copa": {
21
+ "acc": 0.8,
22
+ "acc_stderr": 0.040201512610368445
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.47161919936267677,
26
+ "acc_stderr": 0.004981736689518751,
27
+ "acc_norm": 0.622087233618801,
28
+ "acc_norm_stderr": 0.004838747305783333
29
+ },
30
+ "rte": {
31
+ "acc": 0.5126353790613718,
32
+ "acc_stderr": 0.030086851767188564
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5864246250986582,
36
+ "acc_stderr": 0.013840971763195306
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7076429716729022,
40
+ "acc_stderr": 0.01051823972978774
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5581039755351682,
44
+ "acc_stderr": 0.008685806399014942
45
+ }
46
+ },
47
+ "versions": {
48
+ "anli_r1": 0,
49
+ "anli_r2": 0,
50
+ "anli_r3": 0,
51
+ "cb": 1,
52
+ "copa": 0,
53
+ "hellaswag": 0,
54
+ "rte": 0,
55
+ "winogrande": 0,
56
+ "storycloze_2016": 0,
57
+ "boolq": 1
58
+ }
59
+ }
4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_4_lm-eval_global_step109672_2023-05-15-10-12-47_4shots_backup.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.342,
5
+ "acc_stderr": 0.015008706182121731
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.318,
9
+ "acc_stderr": 0.014734079309311901
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3275,
13
+ "acc_stderr": 0.013553211167251944
14
+ },
15
+ "cb": {
16
+ "acc": 0.4642857142857143,
17
+ "acc_stderr": 0.0672477765493766,
18
+ "f1": 0.3282312925170068
19
+ },
20
+ "copa": {
21
+ "acc": 0.8,
22
+ "acc_stderr": 0.040201512610368445
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.47161919936267677,
26
+ "acc_stderr": 0.004981736689518751,
27
+ "acc_norm": 0.622087233618801,
28
+ "acc_norm_stderr": 0.004838747305783333
29
+ },
30
+ "rte": {
31
+ "acc": 0.5126353790613718,
32
+ "acc_stderr": 0.030086851767188564
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5864246250986582,
36
+ "acc_stderr": 0.013840971763195306
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7076429716729022,
40
+ "acc_stderr": 0.01051823972978774
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5581039755351682,
44
+ "acc_stderr": 0.008685806399014942
45
+ }
46
+ },
47
+ "versions": {
48
+ "anli_r1": 0,
49
+ "anli_r2": 0,
50
+ "anli_r3": 0,
51
+ "cb": 1,
52
+ "copa": 0,
53
+ "hellaswag": 0,
54
+ "rte": 0,
55
+ "winogrande": 0,
56
+ "storycloze_2016": 0,
57
+ "boolq": 1
58
+ }
59
+ }
4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_5.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.318,
5
+ "acc_stderr": 0.014734079309311901
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.322,
9
+ "acc_stderr": 0.014782913600996662
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3525,
13
+ "acc_stderr": 0.01379716491891836
14
+ },
15
+ "cb": {
16
+ "acc": 0.4642857142857143,
17
+ "acc_stderr": 0.06724777654937658,
18
+ "f1": 0.32575201760821715
19
+ },
20
+ "copa": {
21
+ "acc": 0.76,
22
+ "acc_stderr": 0.04292346959909283
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4700258912567218,
26
+ "acc_stderr": 0.0049808072311367515,
27
+ "acc_norm": 0.6182035451105358,
28
+ "acc_norm_stderr": 0.0048483415604921335
29
+ },
30
+ "rte": {
31
+ "acc": 0.5090252707581228,
32
+ "acc_stderr": 0.030091559826331334
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5714285714285714,
36
+ "acc_stderr": 0.013908353814606696
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7071084981293426,
40
+ "acc_stderr": 0.010523873293246305
41
+ }
42
+ },
43
+ "versions": {
44
+ "anli_r1": 0,
45
+ "anli_r2": 0,
46
+ "anli_r3": 0,
47
+ "cb": 1,
48
+ "copa": 0,
49
+ "hellaswag": 0,
50
+ "rte": 0,
51
+ "winogrande": 0,
52
+ "storycloze_2016": 0
53
+ }
54
+ }
4b284b12bc4opt4/evaluation/rankeval/4b284b12bc4opt4_5_lm-eval_global_step109672_2023-05-15-10-12-47_5shots_backup.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.318,
5
+ "acc_stderr": 0.014734079309311901
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.322,
9
+ "acc_stderr": 0.014782913600996662
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3525,
13
+ "acc_stderr": 0.01379716491891836
14
+ },
15
+ "cb": {
16
+ "acc": 0.4642857142857143,
17
+ "acc_stderr": 0.06724777654937658,
18
+ "f1": 0.32575201760821715
19
+ },
20
+ "copa": {
21
+ "acc": 0.76,
22
+ "acc_stderr": 0.04292346959909283
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4700258912567218,
26
+ "acc_stderr": 0.0049808072311367515,
27
+ "acc_norm": 0.6182035451105358,
28
+ "acc_norm_stderr": 0.0048483415604921335
29
+ },
30
+ "rte": {
31
+ "acc": 0.5090252707581228,
32
+ "acc_stderr": 0.030091559826331334
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5714285714285714,
36
+ "acc_stderr": 0.013908353814606696
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7071084981293426,
40
+ "acc_stderr": 0.010523873293246305
41
+ }
42
+ },
43
+ "versions": {
44
+ "anli_r1": 0,
45
+ "anli_r2": 0,
46
+ "anli_r3": 0,
47
+ "cb": 1,
48
+ "copa": 0,
49
+ "hellaswag": 0,
50
+ "rte": 0,
51
+ "winogrande": 0,
52
+ "storycloze_2016": 0
53
+ }
54
+ }
4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85e351a939564871527f6cded6dc4b0b5a90880bdbd18210e3b397f356738cac
3
+ size 144841431
4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70d86aaa19b5745c2eee7b5eaf907296c3af660dc506229a253c4e7d50f5462b
3
+ size 144841517
4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c627d8aa7d8f95f387abcc9dc00ae33933a691e765f13e081ef49d7b979630b6
3
+ size 144841453
4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b28411b3f69b7c8746444f22718f486bfe034aaf2df7225ffc54b5a5c7db1b7
3
+ size 144841517
4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5866c859b6d1c9bfb0b837c71b6f67c0012caca102fea7880577b314e97c018
3
+ size 144841453
4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd10a89dafa7bf3018e8adfed0d93aff4563d140882456ddfd67332cb97b00b7
3
+ size 144841453
4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:812fa3ed497b16432064603e4f24b77f05ea977419af6300e8f61183eb9fd012
3
+ size 144841389
4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a58a5b379206d43b19996d9c1168c23435a9c995e631a838469e8512cd02fba7
3
+ size 144841517
4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2242424ab63926bff3f0a1b9d82e520b4bc5820b8d2fadff720a6d21abc32985
3
+ size 144841453
4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a8c5ba4914469dd60ea5b5cff83783bbe4eb93b702624339475f676437cc361
3
+ size 144841581
4b284b12bc4opt4/global_step109672/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18cb99f24d9b65b8167524863145257df8f99b10b6c84a3f5fbfb1765a36ce7d
3
+ size 144841453