Seonghyeon Lee commited on
Commit
d0d9004
1 Parent(s): 43df447

feat: upload checkpoints

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 20211029_101219/answers-students.wa +0 -0
  2. 20211029_101219/answers-students.wa.untrained +0 -0
  3. 20211029_101219/checkpoint-2000/optimizer.pt +3 -0
  4. 20211029_101219/checkpoint-2000/pytorch_model.bin +3 -0
  5. 20211029_101219/checkpoint-2000/rng_state.pth +3 -0
  6. 20211029_101219/checkpoint-2000/scaler.pt +3 -0
  7. 20211029_101219/checkpoint-2000/scheduler.pt +3 -0
  8. 20211029_101219/checkpoint-2000/special_tokens_map.json +1 -0
  9. 20211029_101219/checkpoint-2000/tokenizer_config.json +1 -0
  10. 20211029_101219/checkpoint-2000/trainer_state.json +80 -0
  11. 20211029_101219/checkpoint-2000/training_args.bin +3 -0
  12. 20211029_101219/checkpoint-2000/vocab.txt +0 -0
  13. 20211029_101219/checkpoint-6250/optimizer.pt +3 -0
  14. 20211029_101219/checkpoint-6250/pytorch_model.bin +3 -0
  15. 20211029_101219/checkpoint-6250/rng_state.pth +3 -0
  16. 20211029_101219/checkpoint-6250/scaler.pt +3 -0
  17. 20211029_101219/checkpoint-6250/scheduler.pt +3 -0
  18. 20211029_101219/checkpoint-6250/special_tokens_map.json +1 -0
  19. 20211029_101219/checkpoint-6250/tokenizer_config.json +1 -0
  20. 20211029_101219/checkpoint-6250/trainer_state.json +213 -0
  21. 20211029_101219/checkpoint-6250/training_args.bin +3 -0
  22. 20211029_101219/checkpoint-6250/vocab.txt +0 -0
  23. 20211029_101219/data_args.json +1 -0
  24. 20211029_101219/eval_results.txt +1 -0
  25. 20211029_101219/headlines.wa +0 -0
  26. 20211029_101219/headlines.wa.untrained +0 -0
  27. 20211029_101219/images.wa +0 -0
  28. 20211029_101219/images.wa.untrained +0 -0
  29. 20211029_101219/model_args.json +1 -0
  30. 20211029_101219/train_results.txt +5 -0
  31. 20211029_101219/training_args.json +1 -0
  32. 20211030_161510/data_args.json +1 -0
  33. 20211030_161510/model_args.json +1 -0
  34. 20211030_161510/training_args.json +1 -0
  35. 20211030_161612/answers-students.wa +0 -0
  36. 20211030_161612/answers-students.wa.untrained +0 -0
  37. 20211030_161612/checkpoint-2000/optimizer.pt +3 -0
  38. 20211030_161612/checkpoint-2000/pytorch_model.bin +3 -0
  39. 20211030_161612/checkpoint-2000/rng_state.pth +3 -0
  40. 20211030_161612/checkpoint-2000/scaler.pt +3 -0
  41. 20211030_161612/checkpoint-2000/scheduler.pt +3 -0
  42. 20211030_161612/checkpoint-2000/special_tokens_map.json +1 -0
  43. 20211030_161612/checkpoint-2000/tokenizer_config.json +1 -0
  44. 20211030_161612/checkpoint-2000/trainer_state.json +80 -0
  45. 20211030_161612/checkpoint-2000/training_args.bin +3 -0
  46. 20211030_161612/checkpoint-2000/vocab.txt +0 -0
  47. 20211030_161612/checkpoint-6250/optimizer.pt +3 -0
  48. 20211030_161612/checkpoint-6250/pytorch_model.bin +3 -0
  49. 20211030_161612/checkpoint-6250/rng_state.pth +3 -0
  50. 20211030_161612/checkpoint-6250/scaler.pt +3 -0
20211029_101219/answers-students.wa ADDED
The diff for this file is too large to render. See raw diff
 
20211029_101219/answers-students.wa.untrained ADDED
The diff for this file is too large to render. See raw diff
 
20211029_101219/checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2ec3f0d2b7c3b5a65a416c246e16d59d9af3fd850b48cc3db8f5d5ca10e7747
3
+ size 875973285
20211029_101219/checkpoint-2000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9118661b7a1faa0ea976e7a43c52c17a06c19e5a040b890f51544901ac932669
3
+ size 440387437
20211029_101219/checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2f52cc3028827f4c99031990038e86f715b12cdf8538aabc1bac0dee99261b0
3
+ size 14503
20211029_101219/checkpoint-2000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3914f16c24b1fd0d20aa37bebdd55cb46873ae864e0847fd6dcc5768e6d7497
3
+ size 559
20211029_101219/checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:512941103bf1e720143f9a916d47d84f8a4565c8eb359bc4b6aa9d12fdcb5f07
3
+ size 623
20211029_101219/checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
20211029_101219/checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "tokenizer_file": "/home/sh0416/.cache/huggingface/transformers/534479488c54aeaf9c3406f647aa2ec13648c06771ffe269edabebd4c412da1d.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4", "name_or_path": "bert-base-uncased", "tokenizer_class": "BertTokenizer"}
20211029_101219/checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8695299721915639,
3
+ "best_model_checkpoint": "/home/sh0416/checkpoints/20211029_101219/checkpoint-2000",
4
+ "epoch": 0.9289363678588016,
5
+ "global_step": 2000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.12,
12
+ "eval_stsb_spearman": 0.8607327167766351,
13
+ "step": 250
14
+ },
15
+ {
16
+ "epoch": 0.23,
17
+ "learning_rate": 3.691593125870878e-05,
18
+ "loss": 0.6096,
19
+ "step": 500
20
+ },
21
+ {
22
+ "epoch": 0.23,
23
+ "eval_stsb_spearman": 0.8686592794214189,
24
+ "step": 500
25
+ },
26
+ {
27
+ "epoch": 0.35,
28
+ "eval_stsb_spearman": 0.859845431717547,
29
+ "step": 750
30
+ },
31
+ {
32
+ "epoch": 0.46,
33
+ "learning_rate": 3.381947669917944e-05,
34
+ "loss": 0.4635,
35
+ "step": 1000
36
+ },
37
+ {
38
+ "epoch": 0.46,
39
+ "eval_stsb_spearman": 0.8640662264378054,
40
+ "step": 1000
41
+ },
42
+ {
43
+ "epoch": 0.58,
44
+ "eval_stsb_spearman": 0.8658685932167778,
45
+ "step": 1250
46
+ },
47
+ {
48
+ "epoch": 0.7,
49
+ "learning_rate": 3.07230221396501e-05,
50
+ "loss": 0.4223,
51
+ "step": 1500
52
+ },
53
+ {
54
+ "epoch": 0.7,
55
+ "eval_stsb_spearman": 0.8684476529412299,
56
+ "step": 1500
57
+ },
58
+ {
59
+ "epoch": 0.81,
60
+ "eval_stsb_spearman": 0.8683877027220283,
61
+ "step": 1750
62
+ },
63
+ {
64
+ "epoch": 0.93,
65
+ "learning_rate": 2.7626567580120764e-05,
66
+ "loss": 0.4018,
67
+ "step": 2000
68
+ },
69
+ {
70
+ "epoch": 0.93,
71
+ "eval_stsb_spearman": 0.8695299721915639,
72
+ "step": 2000
73
+ }
74
+ ],
75
+ "max_steps": 6459,
76
+ "num_train_epochs": 3,
77
+ "total_flos": 0.0,
78
+ "trial_name": null,
79
+ "trial_params": null
80
+ }
20211029_101219/checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f21801a0f477016c386d7845cd97a476e091b21f5e754e8d2b4a45d97a33731b
3
+ size 2735
20211029_101219/checkpoint-2000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
20211029_101219/checkpoint-6250/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c15b19a12e17a8dd82530b2358f9f1db502d9d4774cbb17b59c9fc81202b6a8
3
+ size 875973285
20211029_101219/checkpoint-6250/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4da87d8301f29c03be0b1adf174e50c1b1448248f99bda442ff4cb3a0aa6bafe
3
+ size 440387437
20211029_101219/checkpoint-6250/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b428c2f579b4b0f614f175c697132b1e59f00400df624235d7967af5f7538b9
3
+ size 14503
20211029_101219/checkpoint-6250/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9e6be2d9047bd8c9b6bd066381ccb3d9565dd7267a91985c6cdffa400973ba9
3
+ size 559
20211029_101219/checkpoint-6250/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43193f69601b44b72d776af0fbc138141b555844e67a165b109ba933b2b63767
3
+ size 623
20211029_101219/checkpoint-6250/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
20211029_101219/checkpoint-6250/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "tokenizer_file": "/home/sh0416/.cache/huggingface/transformers/534479488c54aeaf9c3406f647aa2ec13648c06771ffe269edabebd4c412da1d.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4", "name_or_path": "bert-base-uncased", "tokenizer_class": "BertTokenizer"}
20211029_101219/checkpoint-6250/trainer_state.json ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8695299721915639,
3
+ "best_model_checkpoint": "/home/sh0416/checkpoints/20211029_101219/checkpoint-2000",
4
+ "epoch": 2.9029261495587555,
5
+ "global_step": 6250,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.12,
12
+ "eval_stsb_spearman": 0.8607327167766351,
13
+ "step": 250
14
+ },
15
+ {
16
+ "epoch": 0.23,
17
+ "learning_rate": 3.691593125870878e-05,
18
+ "loss": 0.6096,
19
+ "step": 500
20
+ },
21
+ {
22
+ "epoch": 0.23,
23
+ "eval_stsb_spearman": 0.8686592794214189,
24
+ "step": 500
25
+ },
26
+ {
27
+ "epoch": 0.35,
28
+ "eval_stsb_spearman": 0.859845431717547,
29
+ "step": 750
30
+ },
31
+ {
32
+ "epoch": 0.46,
33
+ "learning_rate": 3.381947669917944e-05,
34
+ "loss": 0.4635,
35
+ "step": 1000
36
+ },
37
+ {
38
+ "epoch": 0.46,
39
+ "eval_stsb_spearman": 0.8640662264378054,
40
+ "step": 1000
41
+ },
42
+ {
43
+ "epoch": 0.58,
44
+ "eval_stsb_spearman": 0.8658685932167778,
45
+ "step": 1250
46
+ },
47
+ {
48
+ "epoch": 0.7,
49
+ "learning_rate": 3.07230221396501e-05,
50
+ "loss": 0.4223,
51
+ "step": 1500
52
+ },
53
+ {
54
+ "epoch": 0.7,
55
+ "eval_stsb_spearman": 0.8684476529412299,
56
+ "step": 1500
57
+ },
58
+ {
59
+ "epoch": 0.81,
60
+ "eval_stsb_spearman": 0.8683877027220283,
61
+ "step": 1750
62
+ },
63
+ {
64
+ "epoch": 0.93,
65
+ "learning_rate": 2.7626567580120764e-05,
66
+ "loss": 0.4018,
67
+ "step": 2000
68
+ },
69
+ {
70
+ "epoch": 0.93,
71
+ "eval_stsb_spearman": 0.8695299721915639,
72
+ "step": 2000
73
+ },
74
+ {
75
+ "epoch": 1.05,
76
+ "eval_stsb_spearman": 0.8641507442895762,
77
+ "step": 2250
78
+ },
79
+ {
80
+ "epoch": 1.16,
81
+ "learning_rate": 2.4530113020591425e-05,
82
+ "loss": 0.329,
83
+ "step": 2500
84
+ },
85
+ {
86
+ "epoch": 1.16,
87
+ "eval_stsb_spearman": 0.867926581408182,
88
+ "step": 2500
89
+ },
90
+ {
91
+ "epoch": 1.28,
92
+ "eval_stsb_spearman": 0.8668693608104431,
93
+ "step": 2750
94
+ },
95
+ {
96
+ "epoch": 1.39,
97
+ "learning_rate": 2.1433658461062086e-05,
98
+ "loss": 0.2898,
99
+ "step": 3000
100
+ },
101
+ {
102
+ "epoch": 1.39,
103
+ "eval_stsb_spearman": 0.8683401493085128,
104
+ "step": 3000
105
+ },
106
+ {
107
+ "epoch": 1.51,
108
+ "eval_stsb_spearman": 0.8616608403167029,
109
+ "step": 3250
110
+ },
111
+ {
112
+ "epoch": 1.63,
113
+ "learning_rate": 1.8337203901532747e-05,
114
+ "loss": 0.292,
115
+ "step": 3500
116
+ },
117
+ {
118
+ "epoch": 1.63,
119
+ "eval_stsb_spearman": 0.8661633097318306,
120
+ "step": 3500
121
+ },
122
+ {
123
+ "epoch": 1.74,
124
+ "eval_stsb_spearman": 0.8662656674535325,
125
+ "step": 3750
126
+ },
127
+ {
128
+ "epoch": 1.86,
129
+ "learning_rate": 1.5240749342003407e-05,
130
+ "loss": 0.2826,
131
+ "step": 4000
132
+ },
133
+ {
134
+ "epoch": 1.86,
135
+ "eval_stsb_spearman": 0.8662364297692312,
136
+ "step": 4000
137
+ },
138
+ {
139
+ "epoch": 1.97,
140
+ "eval_stsb_spearman": 0.8647880098527821,
141
+ "step": 4250
142
+ },
143
+ {
144
+ "epoch": 2.09,
145
+ "learning_rate": 1.2144294782474068e-05,
146
+ "loss": 0.251,
147
+ "step": 4500
148
+ },
149
+ {
150
+ "epoch": 2.09,
151
+ "eval_stsb_spearman": 0.8644350558731776,
152
+ "step": 4500
153
+ },
154
+ {
155
+ "epoch": 2.21,
156
+ "eval_stsb_spearman": 0.8650629299708443,
157
+ "step": 4750
158
+ },
159
+ {
160
+ "epoch": 2.32,
161
+ "learning_rate": 9.04784022294473e-06,
162
+ "loss": 0.2152,
163
+ "step": 5000
164
+ },
165
+ {
166
+ "epoch": 2.32,
167
+ "eval_stsb_spearman": 0.8626686418459087,
168
+ "step": 5000
169
+ },
170
+ {
171
+ "epoch": 2.44,
172
+ "eval_stsb_spearman": 0.8643429650624574,
173
+ "step": 5250
174
+ },
175
+ {
176
+ "epoch": 2.55,
177
+ "learning_rate": 5.95138566341539e-06,
178
+ "loss": 0.2165,
179
+ "step": 5500
180
+ },
181
+ {
182
+ "epoch": 2.55,
183
+ "eval_stsb_spearman": 0.8630366314230514,
184
+ "step": 5500
185
+ },
186
+ {
187
+ "epoch": 2.67,
188
+ "eval_stsb_spearman": 0.8629383082790121,
189
+ "step": 5750
190
+ },
191
+ {
192
+ "epoch": 2.79,
193
+ "learning_rate": 2.8549311038860505e-06,
194
+ "loss": 0.2176,
195
+ "step": 6000
196
+ },
197
+ {
198
+ "epoch": 2.79,
199
+ "eval_stsb_spearman": 0.862915863423685,
200
+ "step": 6000
201
+ },
202
+ {
203
+ "epoch": 2.9,
204
+ "eval_stsb_spearman": 0.8627168589609486,
205
+ "step": 6250
206
+ }
207
+ ],
208
+ "max_steps": 6459,
209
+ "num_train_epochs": 3,
210
+ "total_flos": 0.0,
211
+ "trial_name": null,
212
+ "trial_params": null
213
+ }
20211029_101219/checkpoint-6250/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f21801a0f477016c386d7845cd97a476e091b21f5e754e8d2b4a45d97a33731b
3
+ size 2735
20211029_101219/checkpoint-6250/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
20211029_101219/data_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"data_type": "simcse-nli", "train_file": "/nas/home/sh0416/data/simcse/nli_for_simcse.csv", "max_seq_length": 32, "add_typo_corpus": false, "typo_corpus_filepath": null, "dup_rate": 0.08}
20211029_101219/eval_results.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.7523,0.8506,0.8099,0.8626,0.8150,0.8521,0.8049,0.8695
20211029_101219/headlines.wa ADDED
The diff for this file is too large to render. See raw diff
 
20211029_101219/headlines.wa.untrained ADDED
The diff for this file is too large to render. See raw diff
 
20211029_101219/images.wa ADDED
The diff for this file is too large to render. See raw diff
 
20211029_101219/images.wa.untrained ADDED
The diff for this file is too large to render. See raw diff
 
20211029_101219/model_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name_or_path": "bert-base-uncased", "loss_type": "rwmdcse", "temp": 0.05, "hidden_dropout_prob": 0.1, "mlp_only_train": true, "coeff_mlm": 0.1, "loss_rwmd": false, "layer_idx": 12}
20211029_101219/train_results.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ epoch = 3.0
2
+ train_loss = 0.3241952664897545
3
+ train_runtime = 7201.1171
4
+ train_samples_per_second = 114.816
5
+ train_steps_per_second = 0.897
20211029_101219/training_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/home/sh0416/checkpoints/20211029_101219", "overwrite_output_dir": false, "do_train": true, "do_eval": true, "do_predict": false, "evaluation_strategy": "steps", "prediction_loss_only": false, "per_device_train_batch_size": 128, "per_device_eval_batch_size": 128, "per_gpu_train_batch_size": null, "per_gpu_eval_batch_size": null, "gradient_accumulation_steps": 1, "eval_accumulation_steps": null, "learning_rate": 4e-05, "weight_decay": 0.0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08, "max_grad_norm": 1.0, "num_train_epochs": 3.0, "max_steps": -1, "lr_scheduler_type": "linear", "warmup_ratio": 0.0, "warmup_steps": 0, "log_level": -1, "log_level_replica": -1, "log_on_each_node": true, "logging_dir": "/home/sh0416/checkpoints/20211029_101219/runs/Oct29_10-12-20_jarvis", "logging_strategy": "steps", "logging_first_step": false, "logging_steps": 500, "save_strategy": "steps", "save_steps": 500, "save_total_limit": 1, "save_on_each_node": false, "no_cuda": false, "seed": 3, "fp16": true, "fp16_opt_level": "O1", "fp16_backend": "auto", "fp16_full_eval": false, "local_rank": -1, "tpu_num_cores": null, "tpu_metrics_debug": false, "debug": [], "dataloader_drop_last": true, "eval_steps": 250, "dataloader_num_workers": 0, "past_index": -1, "run_name": "/home/sh0416/checkpoints/20211029_101219", "disable_tqdm": false, "remove_unused_columns": true, "label_names": null, "load_best_model_at_end": true, "metric_for_best_model": "stsb_spearman", "greater_is_better": true, "ignore_data_skip": false, "sharded_ddp": [], "deepspeed": null, "label_smoothing_factor": 0.0, "adafactor": false, "group_by_length": false, "length_column_name": "length", "report_to": [], "ddp_find_unused_parameters": null, "dataloader_pin_memory": true, "skip_memory_metrics": true, "use_legacy_prediction_loop": false, "push_to_hub": false, "resume_from_checkpoint": null, "push_to_hub_model_id": "20211029_101219", "push_to_hub_organization": null, "push_to_hub_token": null, "_n_gpu": 1, "mp_parameters": "", "eval_file": "/nas/home/sh0416/data/"}
20211030_161510/data_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"data_type": "simcse-nli", "train_file": "/nas/home/sh0416/data/simcse/nli_for_simcse.csv", "max_seq_length": 32, "add_typo_corpus": false, "typo_corpus_filepath": null, "dup_rate": 0.08}
20211030_161510/model_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name_or_path": "bert-base-uncased", "loss_type": "simcse-avg", "temp": 0.05, "hidden_dropout_prob": 0.1, "mlp_only_train": true, "coeff_mlm": 0.1, "loss_rwmd": false, "layer_idx": 12}
20211030_161510/training_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/home/sh0416/checkpoints/20211030_161510", "overwrite_output_dir": false, "do_train": true, "do_eval": true, "do_predict": false, "evaluation_strategy": "steps", "prediction_loss_only": false, "per_device_train_batch_size": 128, "per_device_eval_batch_size": 128, "per_gpu_train_batch_size": null, "per_gpu_eval_batch_size": null, "gradient_accumulation_steps": 1, "eval_accumulation_steps": null, "learning_rate": 4e-05, "weight_decay": 0.0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08, "max_grad_norm": 1.0, "num_train_epochs": 3.0, "max_steps": -1, "lr_scheduler_type": "linear", "warmup_ratio": 0.0, "warmup_steps": 0, "log_level": -1, "log_level_replica": -1, "log_on_each_node": true, "logging_dir": "/home/sh0416/checkpoints/20211030_161510/runs/Oct30_16-15-10_jarvis", "logging_strategy": "steps", "logging_first_step": false, "logging_steps": 500, "save_strategy": "steps", "save_steps": 500, "save_total_limit": 1, "save_on_each_node": false, "no_cuda": false, "seed": 3, "fp16": true, "fp16_opt_level": "O1", "fp16_backend": "auto", "fp16_full_eval": false, "local_rank": -1, "tpu_num_cores": null, "tpu_metrics_debug": false, "debug": [], "dataloader_drop_last": true, "eval_steps": 250, "dataloader_num_workers": 0, "past_index": -1, "run_name": "/home/sh0416/checkpoints/20211030_161510", "disable_tqdm": false, "remove_unused_columns": true, "label_names": null, "load_best_model_at_end": true, "metric_for_best_model": "stsb_spearman", "greater_is_better": true, "ignore_data_skip": false, "sharded_ddp": [], "deepspeed": null, "label_smoothing_factor": 0.0, "adafactor": false, "group_by_length": false, "length_column_name": "length", "report_to": [], "ddp_find_unused_parameters": null, "dataloader_pin_memory": true, "skip_memory_metrics": true, "use_legacy_prediction_loop": false, "push_to_hub": false, "resume_from_checkpoint": null, "push_to_hub_model_id": "20211030_161510", "push_to_hub_organization": null, "push_to_hub_token": null, "_n_gpu": 1, "mp_parameters": "", "eval_file": "/nas/home/sh0416/data/"}
20211030_161612/answers-students.wa ADDED
The diff for this file is too large to render. See raw diff
 
20211030_161612/answers-students.wa.untrained ADDED
The diff for this file is too large to render. See raw diff
 
20211030_161612/checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55d092fb309a1dbce28c043a5044613fb9e3ad5643787356445f6ea4a27813ce
3
+ size 875973285
20211030_161612/checkpoint-2000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be04aec488d580c8c1134e1602916cd4240244d7c4bf842f50e77028e62df995
3
+ size 440387309
20211030_161612/checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2f52cc3028827f4c99031990038e86f715b12cdf8538aabc1bac0dee99261b0
3
+ size 14503
20211030_161612/checkpoint-2000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f23db1abf0b77198c6e3a9212a16fca37cbfe2158135986751bf408c3cec5d63
3
+ size 559
20211030_161612/checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98c347fc75a377cadf6d4202c418493496db9df9270b4ad415666145a83227b0
3
+ size 623
20211030_161612/checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
20211030_161612/checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "tokenizer_file": "/home/sh0416/.cache/huggingface/transformers/534479488c54aeaf9c3406f647aa2ec13648c06771ffe269edabebd4c412da1d.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4", "name_or_path": "bert-base-uncased", "tokenizer_class": "BertTokenizer"}
20211030_161612/checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8625848570907281,
3
+ "best_model_checkpoint": "/home/sh0416/checkpoints/20211030_161612/checkpoint-2000",
4
+ "epoch": 0.9289363678588016,
5
+ "global_step": 2000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.12,
12
+ "eval_stsb_spearman": 0.849104410089273,
13
+ "step": 250
14
+ },
15
+ {
16
+ "epoch": 0.23,
17
+ "learning_rate": 3.6922124167827844e-05,
18
+ "loss": 0.6558,
19
+ "step": 500
20
+ },
21
+ {
22
+ "epoch": 0.23,
23
+ "eval_stsb_spearman": 0.8549645102083819,
24
+ "step": 500
25
+ },
26
+ {
27
+ "epoch": 0.35,
28
+ "eval_stsb_spearman": 0.8493755409241399,
29
+ "step": 750
30
+ },
31
+ {
32
+ "epoch": 0.46,
33
+ "learning_rate": 3.38256696082985e-05,
34
+ "loss": 0.5061,
35
+ "step": 1000
36
+ },
37
+ {
38
+ "epoch": 0.46,
39
+ "eval_stsb_spearman": 0.8558743382367817,
40
+ "step": 1000
41
+ },
42
+ {
43
+ "epoch": 0.58,
44
+ "eval_stsb_spearman": 0.8579781749671418,
45
+ "step": 1250
46
+ },
47
+ {
48
+ "epoch": 0.7,
49
+ "learning_rate": 3.072921504876916e-05,
50
+ "loss": 0.4618,
51
+ "step": 1500
52
+ },
53
+ {
54
+ "epoch": 0.7,
55
+ "eval_stsb_spearman": 0.8551950210432068,
56
+ "step": 1500
57
+ },
58
+ {
59
+ "epoch": 0.81,
60
+ "eval_stsb_spearman": 0.8575288731834817,
61
+ "step": 1750
62
+ },
63
+ {
64
+ "epoch": 0.93,
65
+ "learning_rate": 2.763276048923982e-05,
66
+ "loss": 0.4405,
67
+ "step": 2000
68
+ },
69
+ {
70
+ "epoch": 0.93,
71
+ "eval_stsb_spearman": 0.8625848570907281,
72
+ "step": 2000
73
+ }
74
+ ],
75
+ "max_steps": 6459,
76
+ "num_train_epochs": 3,
77
+ "total_flos": 0.0,
78
+ "trial_name": null,
79
+ "trial_params": null
80
+ }
20211030_161612/checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa120f507d239c6af97ffb94ef4194b95342c4b09bbeb6681efc143dc43f8c6f
3
+ size 2735
20211030_161612/checkpoint-2000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
20211030_161612/checkpoint-6250/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b4e784b193c65a0c3bffd4faef2628130b7ee68800d57a7066b4ca689645cd
3
+ size 875973285
20211030_161612/checkpoint-6250/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da71a645f28290edf4434b9c7cd1a8235d702f18dc8c9eaf3c521f87a127da30
3
+ size 440387309
20211030_161612/checkpoint-6250/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b428c2f579b4b0f614f175c697132b1e59f00400df624235d7967af5f7538b9
3
+ size 14503
20211030_161612/checkpoint-6250/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53a7cbf97da840f6ef5b40dda02ec9e029260fa5b4ee7e5b95075489b65e35e9
3
+ size 559