dzungpham commited on
Commit
f81d183
·
verified ·
1 Parent(s): 8cafee4

upload graphcodebert robust, best f1 score at 0.54 at robust checkpoint 200

Browse files
Files changed (38) hide show
  1. graphcodebert-robust/checkpoint-1000/config.json +28 -0
  2. graphcodebert-robust/checkpoint-1000/merges.txt +0 -0
  3. graphcodebert-robust/checkpoint-1000/model.safetensors +3 -0
  4. graphcodebert-robust/checkpoint-1000/optimizer.pt +3 -0
  5. graphcodebert-robust/checkpoint-1000/rng_state.pth +3 -0
  6. graphcodebert-robust/checkpoint-1000/scaler.pt +3 -0
  7. graphcodebert-robust/checkpoint-1000/scheduler.pt +3 -0
  8. graphcodebert-robust/checkpoint-1000/special_tokens_map.json +51 -0
  9. graphcodebert-robust/checkpoint-1000/tokenizer.json +0 -0
  10. graphcodebert-robust/checkpoint-1000/tokenizer_config.json +58 -0
  11. graphcodebert-robust/checkpoint-1000/trainer_state.json +753 -0
  12. graphcodebert-robust/checkpoint-1000/training_args.bin +3 -0
  13. graphcodebert-robust/checkpoint-1000/vocab.json +0 -0
  14. graphcodebert-robust/checkpoint-400/model.safetensors +1 -1
  15. graphcodebert-robust/checkpoint-400/optimizer.pt +1 -1
  16. graphcodebert-robust/checkpoint-400/rng_state.pth +2 -2
  17. graphcodebert-robust/checkpoint-400/scaler.pt +1 -1
  18. graphcodebert-robust/checkpoint-400/scheduler.pt +1 -1
  19. graphcodebert-robust/checkpoint-400/tokenizer.json +1 -6
  20. graphcodebert-robust/checkpoint-400/trainer_state.json +63 -63
  21. graphcodebert-robust/checkpoint-400/training_args.bin +1 -1
  22. graphcodebert-robust/checkpoint-600/model.safetensors +1 -1
  23. graphcodebert-robust/checkpoint-600/optimizer.pt +1 -1
  24. graphcodebert-robust/checkpoint-600/rng_state.pth +2 -2
  25. graphcodebert-robust/checkpoint-600/scaler.pt +1 -1
  26. graphcodebert-robust/checkpoint-600/scheduler.pt +1 -1
  27. graphcodebert-robust/checkpoint-600/tokenizer.json +1 -6
  28. graphcodebert-robust/checkpoint-600/trainer_state.json +123 -123
  29. graphcodebert-robust/checkpoint-600/training_args.bin +1 -1
  30. graphcodebert-robust/checkpoint-800/model.safetensors +1 -1
  31. graphcodebert-robust/checkpoint-800/optimizer.pt +1 -1
  32. graphcodebert-robust/checkpoint-800/rng_state.pth +2 -2
  33. graphcodebert-robust/checkpoint-800/scaler.pt +1 -1
  34. graphcodebert-robust/checkpoint-800/scheduler.pt +1 -1
  35. graphcodebert-robust/checkpoint-800/tokenizer.json +1 -6
  36. graphcodebert-robust/checkpoint-800/trainer_state.json +183 -183
  37. graphcodebert-robust/checkpoint-800/training_args.bin +1 -1
  38. graphcodebert-robust/training.log +16 -16
graphcodebert-robust/checkpoint-1000/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.2,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.2,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-05,
17
+ "max_position_embeddings": 514,
18
+ "model_type": "roberta",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "output_past": true,
22
+ "pad_token_id": 1,
23
+ "position_embedding_type": "absolute",
24
+ "transformers_version": "4.56.0",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 50265
28
+ }
graphcodebert-robust/checkpoint-1000/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-robust/checkpoint-1000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1775ae2be975414f5d11bf15988d0aec2d616f5a0808295653aacc889c6cfc71
3
+ size 498612824
graphcodebert-robust/checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e2bac1ac68b08957d3cdb3fda4647a858725c580b3d284372c167bdacd54196
3
+ size 4741923
graphcodebert-robust/checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0f22ce42d575ecb5c503c0a6f1ea1c31f0d2f31df8668facc18e860c7d106ec
3
+ size 14581
graphcodebert-robust/checkpoint-1000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
3
+ size 1383
graphcodebert-robust/checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf09df71d295b44fbffcc7f812f5e2732486c17d57994a3f0f366c7c7a6b5b97
3
+ size 1465
graphcodebert-robust/checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
graphcodebert-robust/checkpoint-1000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-robust/checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
graphcodebert-robust/checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,753 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1000,
3
+ "best_metric": 0.7549859375827388,
4
+ "best_model_checkpoint": "./output_checkpoints/graphcodebert-robust/checkpoint-1000",
5
+ "epoch": 0.064,
6
+ "eval_steps": 1000,
7
+ "global_step": 1000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.00064,
14
+ "grad_norm": 1.6144306659698486,
15
+ "learning_rate": 1.1520000000000002e-08,
16
+ "loss": 0.729,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.00128,
21
+ "grad_norm": 2.0952296257019043,
22
+ "learning_rate": 2.4320000000000002e-08,
23
+ "loss": 0.7295,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.00192,
28
+ "grad_norm": 1.3587689399719238,
29
+ "learning_rate": 3.7120000000000004e-08,
30
+ "loss": 0.73,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.00256,
35
+ "grad_norm": 1.2531732320785522,
36
+ "learning_rate": 4.9920000000000006e-08,
37
+ "loss": 0.7221,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.0032,
42
+ "grad_norm": 1.437932014465332,
43
+ "learning_rate": 6.272000000000001e-08,
44
+ "loss": 0.7209,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.00384,
49
+ "grad_norm": 1.418426752090454,
50
+ "learning_rate": 7.552e-08,
51
+ "loss": 0.729,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.00448,
56
+ "grad_norm": 1.9476298093795776,
57
+ "learning_rate": 8.832e-08,
58
+ "loss": 0.7242,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.00512,
63
+ "grad_norm": 1.7948051691055298,
64
+ "learning_rate": 1.0112000000000001e-07,
65
+ "loss": 0.7227,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.00576,
70
+ "grad_norm": 1.6534360647201538,
71
+ "learning_rate": 1.1392e-07,
72
+ "loss": 0.7234,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.0064,
77
+ "grad_norm": 1.0920158624649048,
78
+ "learning_rate": 1.2672e-07,
79
+ "loss": 0.7328,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.00704,
84
+ "grad_norm": 1.977837085723877,
85
+ "learning_rate": 1.3952000000000002e-07,
86
+ "loss": 0.7263,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.00768,
91
+ "grad_norm": 1.388983130455017,
92
+ "learning_rate": 1.5232000000000003e-07,
93
+ "loss": 0.7286,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.00832,
98
+ "grad_norm": 1.2956682443618774,
99
+ "learning_rate": 1.6512e-07,
100
+ "loss": 0.7251,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.00896,
105
+ "grad_norm": 1.8125052452087402,
106
+ "learning_rate": 1.7792e-07,
107
+ "loss": 0.7251,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.0096,
112
+ "grad_norm": 1.626846194267273,
113
+ "learning_rate": 1.9072e-07,
114
+ "loss": 0.727,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.01024,
119
+ "grad_norm": 2.3243086338043213,
120
+ "learning_rate": 2.0352e-07,
121
+ "loss": 0.726,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 0.01088,
126
+ "grad_norm": 1.4734737873077393,
127
+ "learning_rate": 2.1632e-07,
128
+ "loss": 0.7252,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 0.01152,
133
+ "grad_norm": 2.090498685836792,
134
+ "learning_rate": 2.2912e-07,
135
+ "loss": 0.7273,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 0.01216,
140
+ "grad_norm": 1.7563093900680542,
141
+ "learning_rate": 2.4192000000000004e-07,
142
+ "loss": 0.719,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 0.0128,
147
+ "grad_norm": 1.449843168258667,
148
+ "learning_rate": 2.5472000000000005e-07,
149
+ "loss": 0.7237,
150
+ "step": 200
151
+ },
152
+ {
153
+ "epoch": 0.01344,
154
+ "grad_norm": 141396.296875,
155
+ "learning_rate": 5.350742447516642e-07,
156
+ "loss": 0.7217,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 0.01408,
161
+ "grad_norm": 102339.1640625,
162
+ "learning_rate": 5.606758832565284e-07,
163
+ "loss": 0.7215,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 0.01472,
168
+ "grad_norm": 134052.9375,
169
+ "learning_rate": 5.862775217613928e-07,
170
+ "loss": 0.7115,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 0.01536,
175
+ "grad_norm": 87181.984375,
176
+ "learning_rate": 6.118791602662571e-07,
177
+ "loss": 0.7241,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 0.016,
182
+ "grad_norm": 100231.328125,
183
+ "learning_rate": 6.374807987711214e-07,
184
+ "loss": 0.71,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 0.01664,
189
+ "grad_norm": 136721.484375,
190
+ "learning_rate": 6.630824372759858e-07,
191
+ "loss": 0.7188,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 0.01728,
196
+ "grad_norm": 115868.8125,
197
+ "learning_rate": 6.8868407578085e-07,
198
+ "loss": 0.7199,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 0.01792,
203
+ "grad_norm": 70205.1484375,
204
+ "learning_rate": 7.142857142857143e-07,
205
+ "loss": 0.7299,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 0.01856,
210
+ "grad_norm": 98926.4453125,
211
+ "learning_rate": 7.398873527905787e-07,
212
+ "loss": 0.7159,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 0.0192,
217
+ "grad_norm": 134108.140625,
218
+ "learning_rate": 7.65488991295443e-07,
219
+ "loss": 0.7122,
220
+ "step": 300
221
+ },
222
+ {
223
+ "epoch": 0.01984,
224
+ "grad_norm": 103719.140625,
225
+ "learning_rate": 7.910906298003073e-07,
226
+ "loss": 0.7185,
227
+ "step": 310
228
+ },
229
+ {
230
+ "epoch": 0.02048,
231
+ "grad_norm": 85624.953125,
232
+ "learning_rate": 8.166922683051716e-07,
233
+ "loss": 0.718,
234
+ "step": 320
235
+ },
236
+ {
237
+ "epoch": 0.02112,
238
+ "grad_norm": 138824.15625,
239
+ "learning_rate": 8.422939068100359e-07,
240
+ "loss": 0.713,
241
+ "step": 330
242
+ },
243
+ {
244
+ "epoch": 0.02176,
245
+ "grad_norm": 73629.0859375,
246
+ "learning_rate": 8.678955453149002e-07,
247
+ "loss": 0.7186,
248
+ "step": 340
249
+ },
250
+ {
251
+ "epoch": 0.0224,
252
+ "grad_norm": 132493.0,
253
+ "learning_rate": 8.934971838197646e-07,
254
+ "loss": 0.7133,
255
+ "step": 350
256
+ },
257
+ {
258
+ "epoch": 0.02304,
259
+ "grad_norm": 85223.625,
260
+ "learning_rate": 9.190988223246289e-07,
261
+ "loss": 0.7124,
262
+ "step": 360
263
+ },
264
+ {
265
+ "epoch": 0.02368,
266
+ "grad_norm": 77868.78125,
267
+ "learning_rate": 9.447004608294931e-07,
268
+ "loss": 0.7058,
269
+ "step": 370
270
+ },
271
+ {
272
+ "epoch": 0.02432,
273
+ "grad_norm": 75874.3046875,
274
+ "learning_rate": 9.703020993343575e-07,
275
+ "loss": 0.7139,
276
+ "step": 380
277
+ },
278
+ {
279
+ "epoch": 0.02496,
280
+ "grad_norm": 151937.703125,
281
+ "learning_rate": 9.959037378392218e-07,
282
+ "loss": 0.713,
283
+ "step": 390
284
+ },
285
+ {
286
+ "epoch": 0.0256,
287
+ "grad_norm": 161711.671875,
288
+ "learning_rate": 1.021505376344086e-06,
289
+ "loss": 0.7137,
290
+ "step": 400
291
+ },
292
+ {
293
+ "epoch": 0.02624,
294
+ "grad_norm": 90800.234375,
295
+ "learning_rate": 1.0471070148489503e-06,
296
+ "loss": 0.7091,
297
+ "step": 410
298
+ },
299
+ {
300
+ "epoch": 0.02688,
301
+ "grad_norm": 82131.34375,
302
+ "learning_rate": 1.0727086533538148e-06,
303
+ "loss": 0.7098,
304
+ "step": 420
305
+ },
306
+ {
307
+ "epoch": 0.02752,
308
+ "grad_norm": 92818.9140625,
309
+ "learning_rate": 1.0983102918586791e-06,
310
+ "loss": 0.7099,
311
+ "step": 430
312
+ },
313
+ {
314
+ "epoch": 0.02816,
315
+ "grad_norm": 88555.5078125,
316
+ "learning_rate": 1.1239119303635434e-06,
317
+ "loss": 0.7086,
318
+ "step": 440
319
+ },
320
+ {
321
+ "epoch": 0.0288,
322
+ "grad_norm": 73428.6015625,
323
+ "learning_rate": 1.1495135688684077e-06,
324
+ "loss": 0.7117,
325
+ "step": 450
326
+ },
327
+ {
328
+ "epoch": 0.02944,
329
+ "grad_norm": 128938.7421875,
330
+ "learning_rate": 1.175115207373272e-06,
331
+ "loss": 0.7182,
332
+ "step": 460
333
+ },
334
+ {
335
+ "epoch": 0.03008,
336
+ "grad_norm": 102742.3359375,
337
+ "learning_rate": 1.2007168458781362e-06,
338
+ "loss": 0.7108,
339
+ "step": 470
340
+ },
341
+ {
342
+ "epoch": 0.03072,
343
+ "grad_norm": 73825.8125,
344
+ "learning_rate": 1.2263184843830007e-06,
345
+ "loss": 0.7087,
346
+ "step": 480
347
+ },
348
+ {
349
+ "epoch": 0.03136,
350
+ "grad_norm": 110930.75,
351
+ "learning_rate": 1.251920122887865e-06,
352
+ "loss": 0.7232,
353
+ "step": 490
354
+ },
355
+ {
356
+ "epoch": 0.032,
357
+ "grad_norm": 95068.84375,
358
+ "learning_rate": 1.2775217613927293e-06,
359
+ "loss": 0.703,
360
+ "step": 500
361
+ },
362
+ {
363
+ "epoch": 0.03264,
364
+ "grad_norm": 118731.9296875,
365
+ "learning_rate": 1.3031233998975938e-06,
366
+ "loss": 0.7063,
367
+ "step": 510
368
+ },
369
+ {
370
+ "epoch": 0.03328,
371
+ "grad_norm": 80511.828125,
372
+ "learning_rate": 1.3287250384024578e-06,
373
+ "loss": 0.7143,
374
+ "step": 520
375
+ },
376
+ {
377
+ "epoch": 0.03392,
378
+ "grad_norm": 84864.484375,
379
+ "learning_rate": 1.354326676907322e-06,
380
+ "loss": 0.7055,
381
+ "step": 530
382
+ },
383
+ {
384
+ "epoch": 0.03456,
385
+ "grad_norm": 107800.109375,
386
+ "learning_rate": 1.3799283154121864e-06,
387
+ "loss": 0.7119,
388
+ "step": 540
389
+ },
390
+ {
391
+ "epoch": 0.0352,
392
+ "grad_norm": 83667.671875,
393
+ "learning_rate": 1.4055299539170509e-06,
394
+ "loss": 0.7082,
395
+ "step": 550
396
+ },
397
+ {
398
+ "epoch": 0.03584,
399
+ "grad_norm": 75656.4140625,
400
+ "learning_rate": 1.4311315924219151e-06,
401
+ "loss": 0.7062,
402
+ "step": 560
403
+ },
404
+ {
405
+ "epoch": 0.03648,
406
+ "grad_norm": 79985.875,
407
+ "learning_rate": 1.4567332309267796e-06,
408
+ "loss": 0.7155,
409
+ "step": 570
410
+ },
411
+ {
412
+ "epoch": 0.03712,
413
+ "grad_norm": 76334.078125,
414
+ "learning_rate": 1.4823348694316437e-06,
415
+ "loss": 0.7075,
416
+ "step": 580
417
+ },
418
+ {
419
+ "epoch": 0.03776,
420
+ "grad_norm": 140764.03125,
421
+ "learning_rate": 1.507936507936508e-06,
422
+ "loss": 0.7065,
423
+ "step": 590
424
+ },
425
+ {
426
+ "epoch": 0.0384,
427
+ "grad_norm": 100877.296875,
428
+ "learning_rate": 1.5335381464413722e-06,
429
+ "loss": 0.7096,
430
+ "step": 600
431
+ },
432
+ {
433
+ "epoch": 0.03904,
434
+ "grad_norm": 104088.1171875,
435
+ "learning_rate": 1.5591397849462367e-06,
436
+ "loss": 0.6987,
437
+ "step": 610
438
+ },
439
+ {
440
+ "epoch": 0.03968,
441
+ "grad_norm": 80806.2265625,
442
+ "learning_rate": 1.584741423451101e-06,
443
+ "loss": 0.707,
444
+ "step": 620
445
+ },
446
+ {
447
+ "epoch": 0.04032,
448
+ "grad_norm": 109884.765625,
449
+ "learning_rate": 1.6103430619559655e-06,
450
+ "loss": 0.6991,
451
+ "step": 630
452
+ },
453
+ {
454
+ "epoch": 0.04096,
455
+ "grad_norm": 79944.890625,
456
+ "learning_rate": 1.6359447004608298e-06,
457
+ "loss": 0.7047,
458
+ "step": 640
459
+ },
460
+ {
461
+ "epoch": 0.0416,
462
+ "grad_norm": 93673.3828125,
463
+ "learning_rate": 1.6615463389656938e-06,
464
+ "loss": 0.6971,
465
+ "step": 650
466
+ },
467
+ {
468
+ "epoch": 0.04224,
469
+ "grad_norm": 76641.265625,
470
+ "learning_rate": 1.6871479774705581e-06,
471
+ "loss": 0.6957,
472
+ "step": 660
473
+ },
474
+ {
475
+ "epoch": 0.04288,
476
+ "grad_norm": 73583.5546875,
477
+ "learning_rate": 1.7127496159754226e-06,
478
+ "loss": 0.7028,
479
+ "step": 670
480
+ },
481
+ {
482
+ "epoch": 0.04352,
483
+ "grad_norm": 75177.9609375,
484
+ "learning_rate": 1.7383512544802869e-06,
485
+ "loss": 0.7012,
486
+ "step": 680
487
+ },
488
+ {
489
+ "epoch": 0.04416,
490
+ "grad_norm": 78340.8515625,
491
+ "learning_rate": 1.7639528929851512e-06,
492
+ "loss": 0.6987,
493
+ "step": 690
494
+ },
495
+ {
496
+ "epoch": 0.0448,
497
+ "grad_norm": 86004.1171875,
498
+ "learning_rate": 1.7895545314900157e-06,
499
+ "loss": 0.7061,
500
+ "step": 700
501
+ },
502
+ {
503
+ "epoch": 0.04544,
504
+ "grad_norm": 94212.0390625,
505
+ "learning_rate": 1.8151561699948797e-06,
506
+ "loss": 0.6993,
507
+ "step": 710
508
+ },
509
+ {
510
+ "epoch": 0.04608,
511
+ "grad_norm": 83918.2421875,
512
+ "learning_rate": 1.840757808499744e-06,
513
+ "loss": 0.7009,
514
+ "step": 720
515
+ },
516
+ {
517
+ "epoch": 0.04672,
518
+ "grad_norm": 68374.3125,
519
+ "learning_rate": 1.8663594470046085e-06,
520
+ "loss": 0.6964,
521
+ "step": 730
522
+ },
523
+ {
524
+ "epoch": 0.04736,
525
+ "grad_norm": 90348.78125,
526
+ "learning_rate": 1.8919610855094728e-06,
527
+ "loss": 0.7011,
528
+ "step": 740
529
+ },
530
+ {
531
+ "epoch": 0.048,
532
+ "grad_norm": 146658.0,
533
+ "learning_rate": 1.9175627240143373e-06,
534
+ "loss": 0.7003,
535
+ "step": 750
536
+ },
537
+ {
538
+ "epoch": 0.04864,
539
+ "grad_norm": 112037.1640625,
540
+ "learning_rate": 1.9431643625192015e-06,
541
+ "loss": 0.7051,
542
+ "step": 760
543
+ },
544
+ {
545
+ "epoch": 0.04928,
546
+ "grad_norm": 70628.625,
547
+ "learning_rate": 1.9687660010240654e-06,
548
+ "loss": 0.6923,
549
+ "step": 770
550
+ },
551
+ {
552
+ "epoch": 0.04992,
553
+ "grad_norm": 109922.125,
554
+ "learning_rate": 1.99436763952893e-06,
555
+ "loss": 0.6893,
556
+ "step": 780
557
+ },
558
+ {
559
+ "epoch": 0.05056,
560
+ "grad_norm": 135306.375,
561
+ "learning_rate": 2.0199692780337944e-06,
562
+ "loss": 0.7008,
563
+ "step": 790
564
+ },
565
+ {
566
+ "epoch": 0.0512,
567
+ "grad_norm": 82354.8046875,
568
+ "learning_rate": 2.0455709165386586e-06,
569
+ "loss": 0.705,
570
+ "step": 800
571
+ },
572
+ {
573
+ "epoch": 0.05184,
574
+ "grad_norm": 95951.671875,
575
+ "learning_rate": 2.071172555043523e-06,
576
+ "loss": 0.6912,
577
+ "step": 810
578
+ },
579
+ {
580
+ "epoch": 0.05248,
581
+ "grad_norm": 96797.4609375,
582
+ "learning_rate": 2.096774193548387e-06,
583
+ "loss": 0.6922,
584
+ "step": 820
585
+ },
586
+ {
587
+ "epoch": 0.05312,
588
+ "grad_norm": 87190.625,
589
+ "learning_rate": 2.122375832053252e-06,
590
+ "loss": 0.6946,
591
+ "step": 830
592
+ },
593
+ {
594
+ "epoch": 0.05376,
595
+ "grad_norm": 87958.5625,
596
+ "learning_rate": 2.1479774705581158e-06,
597
+ "loss": 0.6949,
598
+ "step": 840
599
+ },
600
+ {
601
+ "epoch": 0.0544,
602
+ "grad_norm": 77217.1796875,
603
+ "learning_rate": 2.17357910906298e-06,
604
+ "loss": 0.6928,
605
+ "step": 850
606
+ },
607
+ {
608
+ "epoch": 0.05504,
609
+ "grad_norm": 117156.5546875,
610
+ "learning_rate": 2.1991807475678443e-06,
611
+ "loss": 0.692,
612
+ "step": 860
613
+ },
614
+ {
615
+ "epoch": 0.05568,
616
+ "grad_norm": 94618.6875,
617
+ "learning_rate": 2.224782386072709e-06,
618
+ "loss": 0.6976,
619
+ "step": 870
620
+ },
621
+ {
622
+ "epoch": 0.05632,
623
+ "grad_norm": 71444.6484375,
624
+ "learning_rate": 2.2503840245775733e-06,
625
+ "loss": 0.6989,
626
+ "step": 880
627
+ },
628
+ {
629
+ "epoch": 0.05696,
630
+ "grad_norm": 159991.609375,
631
+ "learning_rate": 2.2759856630824376e-06,
632
+ "loss": 0.6928,
633
+ "step": 890
634
+ },
635
+ {
636
+ "epoch": 0.0576,
637
+ "grad_norm": 81899.6875,
638
+ "learning_rate": 2.301587301587302e-06,
639
+ "loss": 0.691,
640
+ "step": 900
641
+ },
642
+ {
643
+ "epoch": 0.05824,
644
+ "grad_norm": 110817.3671875,
645
+ "learning_rate": 2.327188940092166e-06,
646
+ "loss": 0.6858,
647
+ "step": 910
648
+ },
649
+ {
650
+ "epoch": 0.05888,
651
+ "grad_norm": 105698.109375,
652
+ "learning_rate": 2.3527905785970304e-06,
653
+ "loss": 0.6965,
654
+ "step": 920
655
+ },
656
+ {
657
+ "epoch": 0.05952,
658
+ "grad_norm": 76475.0,
659
+ "learning_rate": 2.3783922171018947e-06,
660
+ "loss": 0.6901,
661
+ "step": 930
662
+ },
663
+ {
664
+ "epoch": 0.06016,
665
+ "grad_norm": 96672.6796875,
666
+ "learning_rate": 2.403993855606759e-06,
667
+ "loss": 0.6908,
668
+ "step": 940
669
+ },
670
+ {
671
+ "epoch": 0.0608,
672
+ "grad_norm": 114510.8125,
673
+ "learning_rate": 2.4295954941116232e-06,
674
+ "loss": 0.6904,
675
+ "step": 950
676
+ },
677
+ {
678
+ "epoch": 0.06144,
679
+ "grad_norm": 62412.4375,
680
+ "learning_rate": 2.455197132616488e-06,
681
+ "loss": 0.6855,
682
+ "step": 960
683
+ },
684
+ {
685
+ "epoch": 0.06208,
686
+ "grad_norm": 92860.7109375,
687
+ "learning_rate": 2.4807987711213518e-06,
688
+ "loss": 0.6752,
689
+ "step": 970
690
+ },
691
+ {
692
+ "epoch": 0.06272,
693
+ "grad_norm": 75184.359375,
694
+ "learning_rate": 2.506400409626216e-06,
695
+ "loss": 0.6868,
696
+ "step": 980
697
+ },
698
+ {
699
+ "epoch": 0.06336,
700
+ "grad_norm": 77771.1640625,
701
+ "learning_rate": 2.5320020481310808e-06,
702
+ "loss": 0.6941,
703
+ "step": 990
704
+ },
705
+ {
706
+ "epoch": 0.064,
707
+ "grad_norm": 65366.796875,
708
+ "learning_rate": 2.557603686635945e-06,
709
+ "loss": 0.6808,
710
+ "step": 1000
711
+ },
712
+ {
713
+ "epoch": 0.064,
714
+ "eval_accuracy": 0.75744,
715
+ "eval_loss": 0.6539617776870728,
716
+ "eval_macro_f1": 0.7549859375827388,
717
+ "eval_runtime": 1576.6702,
718
+ "eval_samples_per_second": 63.425,
719
+ "eval_steps_per_second": 0.496,
720
+ "step": 1000
721
+ }
722
+ ],
723
+ "logging_steps": 10,
724
+ "max_steps": 78125,
725
+ "num_input_tokens_seen": 0,
726
+ "num_train_epochs": 5,
727
+ "save_steps": 200,
728
+ "stateful_callbacks": {
729
+ "EarlyStoppingCallback": {
730
+ "args": {
731
+ "early_stopping_patience": 3,
732
+ "early_stopping_threshold": 0.0
733
+ },
734
+ "attributes": {
735
+ "early_stopping_patience_counter": 0
736
+ }
737
+ },
738
+ "TrainerControl": {
739
+ "args": {
740
+ "should_epoch_stop": false,
741
+ "should_evaluate": false,
742
+ "should_log": false,
743
+ "should_save": true,
744
+ "should_training_stop": false
745
+ },
746
+ "attributes": {}
747
+ }
748
+ },
749
+ "total_flos": 8417004883171200.0,
750
+ "train_batch_size": 32,
751
+ "trial_name": null,
752
+ "trial_params": null
753
+ }
graphcodebert-robust/checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5fc4023adc281644437a690ea6a6001846d7762699cd428d4ad38e1888076db
3
+ size 5841
graphcodebert-robust/checkpoint-1000/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-robust/checkpoint-400/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e4e748b25483175160cea7725c3f8f0878d2cab69bc662e854fb2f2191256cd
3
  size 498612824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a01766ea37053c4e1086db23a592ccd390b6f66d530273ae2dae69fbf9aa39e
3
  size 498612824
graphcodebert-robust/checkpoint-400/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f90982cf6a84b33871eaabef13bee4999efc870036a33418fc702291783e6ef
3
  size 4741923
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3993e14f8e5395da15ce3350b7a6c24a8b0c21921fd8cce7a29d5175f071b2fc
3
  size 4741923
graphcodebert-robust/checkpoint-400/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db236f43af2a1e18f8bd14b48ca1899a08ea03909f6a24acd7f544ce3ee66296
3
- size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ab60503702bb1354c5765d2c7d1ba9f47491e07ac8864941c7126246dccd968
3
+ size 14581
graphcodebert-robust/checkpoint-400/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30858f23bcb22d0baef45bd4add9d6fa474141308c12653c706077b87d932e49
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
3
  size 1383
graphcodebert-robust/checkpoint-400/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69fd2a2128ae1d3fdad80684195acce5b8c4cb843627b5e9241d19cba08e96e4
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99753ecc9725cb463a1acc03fa95671b59d366ed45a71854383d0a8e379a982d
3
  size 1465
graphcodebert-robust/checkpoint-400/tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
graphcodebert-robust/checkpoint-400/trainer_state.json CHANGED
@@ -151,149 +151,149 @@
151
  },
152
  {
153
  "epoch": 0.01344,
154
- "grad_norm": 2.1326472759246826,
155
- "learning_rate": 2.6752000000000006e-07,
156
- "loss": 0.7305,
157
  "step": 210
158
  },
159
  {
160
  "epoch": 0.01408,
161
- "grad_norm": 2.21703839302063,
162
- "learning_rate": 2.8032e-07,
163
- "loss": 0.7167,
164
  "step": 220
165
  },
166
  {
167
  "epoch": 0.01472,
168
- "grad_norm": 1.6385700702667236,
169
- "learning_rate": 2.9312e-07,
170
- "loss": 0.7209,
171
  "step": 230
172
  },
173
  {
174
  "epoch": 0.01536,
175
- "grad_norm": 1.4293471574783325,
176
- "learning_rate": 3.0592000000000003e-07,
177
- "loss": 0.722,
178
  "step": 240
179
  },
180
  {
181
  "epoch": 0.016,
182
- "grad_norm": 2.1437904834747314,
183
- "learning_rate": 3.1872e-07,
184
- "loss": 0.717,
185
  "step": 250
186
  },
187
  {
188
  "epoch": 0.01664,
189
- "grad_norm": 2.014806032180786,
190
- "learning_rate": 3.3152000000000005e-07,
191
- "loss": 0.7182,
192
  "step": 260
193
  },
194
  {
195
  "epoch": 0.01728,
196
- "grad_norm": 1.7216386795043945,
197
- "learning_rate": 3.4432e-07,
198
- "loss": 0.7253,
199
  "step": 270
200
  },
201
  {
202
  "epoch": 0.01792,
203
- "grad_norm": 1.4267009496688843,
204
- "learning_rate": 3.5712e-07,
205
- "loss": 0.7189,
206
  "step": 280
207
  },
208
  {
209
  "epoch": 0.01856,
210
- "grad_norm": 2.222503185272217,
211
- "learning_rate": 3.6992e-07,
212
- "loss": 0.7198,
213
  "step": 290
214
  },
215
  {
216
  "epoch": 0.0192,
217
- "grad_norm": 1.578922986984253,
218
- "learning_rate": 3.8272000000000003e-07,
219
- "loss": 0.717,
220
  "step": 300
221
  },
222
  {
223
  "epoch": 0.01984,
224
- "grad_norm": 1.719905972480774,
225
- "learning_rate": 3.9552e-07,
226
- "loss": 0.709,
227
  "step": 310
228
  },
229
  {
230
  "epoch": 0.02048,
231
- "grad_norm": 1.4473963975906372,
232
- "learning_rate": 4.0832000000000005e-07,
233
- "loss": 0.7215,
234
  "step": 320
235
  },
236
  {
237
  "epoch": 0.02112,
238
- "grad_norm": 2.1639790534973145,
239
- "learning_rate": 4.2112e-07,
240
- "loss": 0.7175,
241
  "step": 330
242
  },
243
  {
244
  "epoch": 0.02176,
245
- "grad_norm": 1.2387958765029907,
246
- "learning_rate": 4.3392e-07,
247
- "loss": 0.7129,
248
  "step": 340
249
  },
250
  {
251
  "epoch": 0.0224,
252
- "grad_norm": 2.2797842025756836,
253
- "learning_rate": 4.4672000000000007e-07,
254
- "loss": 0.7159,
255
  "step": 350
256
  },
257
  {
258
  "epoch": 0.02304,
259
- "grad_norm": 1.5692473649978638,
260
- "learning_rate": 4.5952000000000003e-07,
261
- "loss": 0.7161,
262
  "step": 360
263
  },
264
  {
265
  "epoch": 0.02368,
266
- "grad_norm": 1.4270817041397095,
267
- "learning_rate": 4.723200000000001e-07,
268
- "loss": 0.7114,
269
  "step": 370
270
  },
271
  {
272
  "epoch": 0.02432,
273
- "grad_norm": 1.4091335535049438,
274
- "learning_rate": 4.8512e-07,
275
- "loss": 0.7127,
276
  "step": 380
277
  },
278
  {
279
  "epoch": 0.02496,
280
- "grad_norm": 1.8862844705581665,
281
- "learning_rate": 4.979200000000001e-07,
282
- "loss": 0.7153,
283
  "step": 390
284
  },
285
  {
286
  "epoch": 0.0256,
287
- "grad_norm": 1.9264376163482666,
288
- "learning_rate": 5.107200000000001e-07,
289
- "loss": 0.7109,
290
  "step": 400
291
  }
292
  ],
293
  "logging_steps": 10,
294
- "max_steps": 156250,
295
  "num_input_tokens_seen": 0,
296
- "num_train_epochs": 10,
297
  "save_steps": 200,
298
  "stateful_callbacks": {
299
  "EarlyStoppingCallback": {
@@ -316,7 +316,7 @@
316
  "attributes": {}
317
  }
318
  },
319
- "total_flos": 3367821508608000.0,
320
  "train_batch_size": 32,
321
  "trial_name": null,
322
  "trial_params": null
 
151
  },
152
  {
153
  "epoch": 0.01344,
154
+ "grad_norm": 141396.296875,
155
+ "learning_rate": 5.350742447516642e-07,
156
+ "loss": 0.7217,
157
  "step": 210
158
  },
159
  {
160
  "epoch": 0.01408,
161
+ "grad_norm": 102339.1640625,
162
+ "learning_rate": 5.606758832565284e-07,
163
+ "loss": 0.7215,
164
  "step": 220
165
  },
166
  {
167
  "epoch": 0.01472,
168
+ "grad_norm": 134052.9375,
169
+ "learning_rate": 5.862775217613928e-07,
170
+ "loss": 0.7115,
171
  "step": 230
172
  },
173
  {
174
  "epoch": 0.01536,
175
+ "grad_norm": 87181.984375,
176
+ "learning_rate": 6.118791602662571e-07,
177
+ "loss": 0.7241,
178
  "step": 240
179
  },
180
  {
181
  "epoch": 0.016,
182
+ "grad_norm": 100231.328125,
183
+ "learning_rate": 6.374807987711214e-07,
184
+ "loss": 0.71,
185
  "step": 250
186
  },
187
  {
188
  "epoch": 0.01664,
189
+ "grad_norm": 136721.484375,
190
+ "learning_rate": 6.630824372759858e-07,
191
+ "loss": 0.7188,
192
  "step": 260
193
  },
194
  {
195
  "epoch": 0.01728,
196
+ "grad_norm": 115868.8125,
197
+ "learning_rate": 6.8868407578085e-07,
198
+ "loss": 0.7199,
199
  "step": 270
200
  },
201
  {
202
  "epoch": 0.01792,
203
+ "grad_norm": 70205.1484375,
204
+ "learning_rate": 7.142857142857143e-07,
205
+ "loss": 0.7299,
206
  "step": 280
207
  },
208
  {
209
  "epoch": 0.01856,
210
+ "grad_norm": 98926.4453125,
211
+ "learning_rate": 7.398873527905787e-07,
212
+ "loss": 0.7159,
213
  "step": 290
214
  },
215
  {
216
  "epoch": 0.0192,
217
+ "grad_norm": 134108.140625,
218
+ "learning_rate": 7.65488991295443e-07,
219
+ "loss": 0.7122,
220
  "step": 300
221
  },
222
  {
223
  "epoch": 0.01984,
224
+ "grad_norm": 103719.140625,
225
+ "learning_rate": 7.910906298003073e-07,
226
+ "loss": 0.7185,
227
  "step": 310
228
  },
229
  {
230
  "epoch": 0.02048,
231
+ "grad_norm": 85624.953125,
232
+ "learning_rate": 8.166922683051716e-07,
233
+ "loss": 0.718,
234
  "step": 320
235
  },
236
  {
237
  "epoch": 0.02112,
238
+ "grad_norm": 138824.15625,
239
+ "learning_rate": 8.422939068100359e-07,
240
+ "loss": 0.713,
241
  "step": 330
242
  },
243
  {
244
  "epoch": 0.02176,
245
+ "grad_norm": 73629.0859375,
246
+ "learning_rate": 8.678955453149002e-07,
247
+ "loss": 0.7186,
248
  "step": 340
249
  },
250
  {
251
  "epoch": 0.0224,
252
+ "grad_norm": 132493.0,
253
+ "learning_rate": 8.934971838197646e-07,
254
+ "loss": 0.7133,
255
  "step": 350
256
  },
257
  {
258
  "epoch": 0.02304,
259
+ "grad_norm": 85223.625,
260
+ "learning_rate": 9.190988223246289e-07,
261
+ "loss": 0.7124,
262
  "step": 360
263
  },
264
  {
265
  "epoch": 0.02368,
266
+ "grad_norm": 77868.78125,
267
+ "learning_rate": 9.447004608294931e-07,
268
+ "loss": 0.7058,
269
  "step": 370
270
  },
271
  {
272
  "epoch": 0.02432,
273
+ "grad_norm": 75874.3046875,
274
+ "learning_rate": 9.703020993343575e-07,
275
+ "loss": 0.7139,
276
  "step": 380
277
  },
278
  {
279
  "epoch": 0.02496,
280
+ "grad_norm": 151937.703125,
281
+ "learning_rate": 9.959037378392218e-07,
282
+ "loss": 0.713,
283
  "step": 390
284
  },
285
  {
286
  "epoch": 0.0256,
287
+ "grad_norm": 161711.671875,
288
+ "learning_rate": 1.021505376344086e-06,
289
+ "loss": 0.7137,
290
  "step": 400
291
  }
292
  ],
293
  "logging_steps": 10,
294
+ "max_steps": 78125,
295
  "num_input_tokens_seen": 0,
296
+ "num_train_epochs": 5,
297
  "save_steps": 200,
298
  "stateful_callbacks": {
299
  "EarlyStoppingCallback": {
 
316
  "attributes": {}
317
  }
318
  },
319
+ "total_flos": 3367295286497280.0,
320
  "train_batch_size": 32,
321
  "trial_name": null,
322
  "trial_params": null
graphcodebert-robust/checkpoint-400/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ff659b85d84ec0bae53596bc271ba773db9c463626db0f13fd8e747f433dad4
3
  size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5fc4023adc281644437a690ea6a6001846d7762699cd428d4ad38e1888076db
3
  size 5841
graphcodebert-robust/checkpoint-600/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33d1e5063213f214a9f1effa3c1d7fdca40af6b0941bb37ae0f1a6239c90b3c4
3
  size 498612824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75ec427b92df30abfd117ca61bf8855a95bff5b8e2f300c83f23131aa83f89a3
3
  size 498612824
graphcodebert-robust/checkpoint-600/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:787767f32f92db97a85e79ae7369b941462a2aa040ad04230091e634625d1bd5
3
  size 4741923
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6eaf9c7a3d50e76cca47c4da094a2db7ca99a2b289f3509dc98882e9debad13
3
  size 4741923
graphcodebert-robust/checkpoint-600/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae074b64b04f15c65bed20fbc593949760914672b525152439949dbdeac14c41
3
- size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820bebfae8bbd9452955c53efeeb042e6227f4bb5c733fac637c835bd717c752
3
+ size 14581
graphcodebert-robust/checkpoint-600/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb7fde5111803012042c93a73aa191336bb6e10b3ad44f6bd1d94fc7008a22b6
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
3
  size 1383
graphcodebert-robust/checkpoint-600/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae70cc056ae3330cc58f33660559174defa991e45f91baa83a3ceffabb8b19fd
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abc0eb96c2d3f04dd37bcd945b0c2a2b0de8956916d0c07353bb361443cea60c
3
  size 1465
graphcodebert-robust/checkpoint-600/tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
graphcodebert-robust/checkpoint-600/trainer_state.json CHANGED
@@ -151,289 +151,289 @@
151
  },
152
  {
153
  "epoch": 0.01344,
154
- "grad_norm": 2.1326472759246826,
155
- "learning_rate": 2.6752000000000006e-07,
156
- "loss": 0.7305,
157
  "step": 210
158
  },
159
  {
160
  "epoch": 0.01408,
161
- "grad_norm": 2.21703839302063,
162
- "learning_rate": 2.8032e-07,
163
- "loss": 0.7167,
164
  "step": 220
165
  },
166
  {
167
  "epoch": 0.01472,
168
- "grad_norm": 1.6385700702667236,
169
- "learning_rate": 2.9312e-07,
170
- "loss": 0.7209,
171
  "step": 230
172
  },
173
  {
174
  "epoch": 0.01536,
175
- "grad_norm": 1.4293471574783325,
176
- "learning_rate": 3.0592000000000003e-07,
177
- "loss": 0.722,
178
  "step": 240
179
  },
180
  {
181
  "epoch": 0.016,
182
- "grad_norm": 2.1437904834747314,
183
- "learning_rate": 3.1872e-07,
184
- "loss": 0.717,
185
  "step": 250
186
  },
187
  {
188
  "epoch": 0.01664,
189
- "grad_norm": 2.014806032180786,
190
- "learning_rate": 3.3152000000000005e-07,
191
- "loss": 0.7182,
192
  "step": 260
193
  },
194
  {
195
  "epoch": 0.01728,
196
- "grad_norm": 1.7216386795043945,
197
- "learning_rate": 3.4432e-07,
198
- "loss": 0.7253,
199
  "step": 270
200
  },
201
  {
202
  "epoch": 0.01792,
203
- "grad_norm": 1.4267009496688843,
204
- "learning_rate": 3.5712e-07,
205
- "loss": 0.7189,
206
  "step": 280
207
  },
208
  {
209
  "epoch": 0.01856,
210
- "grad_norm": 2.222503185272217,
211
- "learning_rate": 3.6992e-07,
212
- "loss": 0.7198,
213
  "step": 290
214
  },
215
  {
216
  "epoch": 0.0192,
217
- "grad_norm": 1.578922986984253,
218
- "learning_rate": 3.8272000000000003e-07,
219
- "loss": 0.717,
220
  "step": 300
221
  },
222
  {
223
  "epoch": 0.01984,
224
- "grad_norm": 1.719905972480774,
225
- "learning_rate": 3.9552e-07,
226
- "loss": 0.709,
227
  "step": 310
228
  },
229
  {
230
  "epoch": 0.02048,
231
- "grad_norm": 1.4473963975906372,
232
- "learning_rate": 4.0832000000000005e-07,
233
- "loss": 0.7215,
234
  "step": 320
235
  },
236
  {
237
  "epoch": 0.02112,
238
- "grad_norm": 2.1639790534973145,
239
- "learning_rate": 4.2112e-07,
240
- "loss": 0.7175,
241
  "step": 330
242
  },
243
  {
244
  "epoch": 0.02176,
245
- "grad_norm": 1.2387958765029907,
246
- "learning_rate": 4.3392e-07,
247
- "loss": 0.7129,
248
  "step": 340
249
  },
250
  {
251
  "epoch": 0.0224,
252
- "grad_norm": 2.2797842025756836,
253
- "learning_rate": 4.4672000000000007e-07,
254
- "loss": 0.7159,
255
  "step": 350
256
  },
257
  {
258
  "epoch": 0.02304,
259
- "grad_norm": 1.5692473649978638,
260
- "learning_rate": 4.5952000000000003e-07,
261
- "loss": 0.7161,
262
  "step": 360
263
  },
264
  {
265
  "epoch": 0.02368,
266
- "grad_norm": 1.4270817041397095,
267
- "learning_rate": 4.723200000000001e-07,
268
- "loss": 0.7114,
269
  "step": 370
270
  },
271
  {
272
  "epoch": 0.02432,
273
- "grad_norm": 1.4091335535049438,
274
- "learning_rate": 4.8512e-07,
275
- "loss": 0.7127,
276
  "step": 380
277
  },
278
  {
279
  "epoch": 0.02496,
280
- "grad_norm": 1.8862844705581665,
281
- "learning_rate": 4.979200000000001e-07,
282
- "loss": 0.7153,
283
  "step": 390
284
  },
285
  {
286
  "epoch": 0.0256,
287
- "grad_norm": 1.9264376163482666,
288
- "learning_rate": 5.107200000000001e-07,
289
- "loss": 0.7109,
290
  "step": 400
291
  },
292
  {
293
  "epoch": 0.02624,
294
- "grad_norm": 1.4058727025985718,
295
- "learning_rate": 5.235200000000001e-07,
296
- "loss": 0.705,
297
  "step": 410
298
  },
299
  {
300
  "epoch": 0.02688,
301
- "grad_norm": 1.519445776939392,
302
- "learning_rate": 5.363200000000001e-07,
303
- "loss": 0.7131,
304
  "step": 420
305
  },
306
  {
307
  "epoch": 0.02752,
308
- "grad_norm": 1.6636698246002197,
309
- "learning_rate": 5.491200000000001e-07,
310
- "loss": 0.6916,
311
  "step": 430
312
  },
313
  {
314
  "epoch": 0.02816,
315
- "grad_norm": 1.5472590923309326,
316
- "learning_rate": 5.6192e-07,
317
- "loss": 0.705,
318
  "step": 440
319
  },
320
  {
321
  "epoch": 0.0288,
322
- "grad_norm": 1.4896206855773926,
323
- "learning_rate": 5.747200000000001e-07,
324
- "loss": 0.7046,
325
  "step": 450
326
  },
327
  {
328
  "epoch": 0.02944,
329
- "grad_norm": 2.2565503120422363,
330
- "learning_rate": 5.8752e-07,
331
- "loss": 0.7009,
332
  "step": 460
333
  },
334
  {
335
  "epoch": 0.03008,
336
- "grad_norm": 2.017638683319092,
337
- "learning_rate": 6.0032e-07,
338
- "loss": 0.7058,
339
  "step": 470
340
  },
341
  {
342
  "epoch": 0.03072,
343
- "grad_norm": 1.3399696350097656,
344
- "learning_rate": 6.1312e-07,
345
- "loss": 0.7003,
346
  "step": 480
347
  },
348
  {
349
  "epoch": 0.03136,
350
- "grad_norm": 1.3090866804122925,
351
- "learning_rate": 6.2592e-07,
352
- "loss": 0.7067,
353
  "step": 490
354
  },
355
  {
356
  "epoch": 0.032,
357
- "grad_norm": 1.4199142456054688,
358
- "learning_rate": 6.3872e-07,
359
- "loss": 0.7008,
360
  "step": 500
361
  },
362
  {
363
  "epoch": 0.03264,
364
- "grad_norm": 1.7174904346466064,
365
- "learning_rate": 6.515200000000001e-07,
366
- "loss": 0.7003,
367
  "step": 510
368
  },
369
  {
370
  "epoch": 0.03328,
371
- "grad_norm": 1.2983943223953247,
372
- "learning_rate": 6.643200000000001e-07,
373
- "loss": 0.698,
374
  "step": 520
375
  },
376
  {
377
  "epoch": 0.03392,
378
- "grad_norm": 1.8224154710769653,
379
- "learning_rate": 6.7712e-07,
380
- "loss": 0.7047,
381
  "step": 530
382
  },
383
  {
384
  "epoch": 0.03456,
385
- "grad_norm": 1.3605278730392456,
386
- "learning_rate": 6.899200000000001e-07,
387
- "loss": 0.6974,
388
  "step": 540
389
  },
390
  {
391
  "epoch": 0.0352,
392
- "grad_norm": 1.4932376146316528,
393
- "learning_rate": 7.027200000000001e-07,
394
- "loss": 0.6918,
395
  "step": 550
396
  },
397
  {
398
  "epoch": 0.03584,
399
- "grad_norm": 1.2169368267059326,
400
- "learning_rate": 7.155200000000001e-07,
401
- "loss": 0.6996,
402
  "step": 560
403
  },
404
  {
405
  "epoch": 0.03648,
406
- "grad_norm": 1.5690464973449707,
407
- "learning_rate": 7.2832e-07,
408
- "loss": 0.6942,
409
  "step": 570
410
  },
411
  {
412
  "epoch": 0.03712,
413
- "grad_norm": 1.541991949081421,
414
- "learning_rate": 7.4112e-07,
415
- "loss": 0.6973,
416
  "step": 580
417
  },
418
  {
419
  "epoch": 0.03776,
420
- "grad_norm": 1.7749661207199097,
421
- "learning_rate": 7.5392e-07,
422
- "loss": 0.6865,
423
  "step": 590
424
  },
425
  {
426
  "epoch": 0.0384,
427
- "grad_norm": 1.2169281244277954,
428
- "learning_rate": 7.667200000000001e-07,
429
- "loss": 0.6876,
430
  "step": 600
431
  }
432
  ],
433
  "logging_steps": 10,
434
- "max_steps": 156250,
435
  "num_input_tokens_seen": 0,
436
- "num_train_epochs": 10,
437
  "save_steps": 200,
438
  "stateful_callbacks": {
439
  "EarlyStoppingCallback": {
@@ -456,7 +456,7 @@
456
  "attributes": {}
457
  }
458
  },
459
- "total_flos": 5051732262912000.0,
460
  "train_batch_size": 32,
461
  "trial_name": null,
462
  "trial_params": null
 
151
  },
152
  {
153
  "epoch": 0.01344,
154
+ "grad_norm": 141396.296875,
155
+ "learning_rate": 5.350742447516642e-07,
156
+ "loss": 0.7217,
157
  "step": 210
158
  },
159
  {
160
  "epoch": 0.01408,
161
+ "grad_norm": 102339.1640625,
162
+ "learning_rate": 5.606758832565284e-07,
163
+ "loss": 0.7215,
164
  "step": 220
165
  },
166
  {
167
  "epoch": 0.01472,
168
+ "grad_norm": 134052.9375,
169
+ "learning_rate": 5.862775217613928e-07,
170
+ "loss": 0.7115,
171
  "step": 230
172
  },
173
  {
174
  "epoch": 0.01536,
175
+ "grad_norm": 87181.984375,
176
+ "learning_rate": 6.118791602662571e-07,
177
+ "loss": 0.7241,
178
  "step": 240
179
  },
180
  {
181
  "epoch": 0.016,
182
+ "grad_norm": 100231.328125,
183
+ "learning_rate": 6.374807987711214e-07,
184
+ "loss": 0.71,
185
  "step": 250
186
  },
187
  {
188
  "epoch": 0.01664,
189
+ "grad_norm": 136721.484375,
190
+ "learning_rate": 6.630824372759858e-07,
191
+ "loss": 0.7188,
192
  "step": 260
193
  },
194
  {
195
  "epoch": 0.01728,
196
+ "grad_norm": 115868.8125,
197
+ "learning_rate": 6.8868407578085e-07,
198
+ "loss": 0.7199,
199
  "step": 270
200
  },
201
  {
202
  "epoch": 0.01792,
203
+ "grad_norm": 70205.1484375,
204
+ "learning_rate": 7.142857142857143e-07,
205
+ "loss": 0.7299,
206
  "step": 280
207
  },
208
  {
209
  "epoch": 0.01856,
210
+ "grad_norm": 98926.4453125,
211
+ "learning_rate": 7.398873527905787e-07,
212
+ "loss": 0.7159,
213
  "step": 290
214
  },
215
  {
216
  "epoch": 0.0192,
217
+ "grad_norm": 134108.140625,
218
+ "learning_rate": 7.65488991295443e-07,
219
+ "loss": 0.7122,
220
  "step": 300
221
  },
222
  {
223
  "epoch": 0.01984,
224
+ "grad_norm": 103719.140625,
225
+ "learning_rate": 7.910906298003073e-07,
226
+ "loss": 0.7185,
227
  "step": 310
228
  },
229
  {
230
  "epoch": 0.02048,
231
+ "grad_norm": 85624.953125,
232
+ "learning_rate": 8.166922683051716e-07,
233
+ "loss": 0.718,
234
  "step": 320
235
  },
236
  {
237
  "epoch": 0.02112,
238
+ "grad_norm": 138824.15625,
239
+ "learning_rate": 8.422939068100359e-07,
240
+ "loss": 0.713,
241
  "step": 330
242
  },
243
  {
244
  "epoch": 0.02176,
245
+ "grad_norm": 73629.0859375,
246
+ "learning_rate": 8.678955453149002e-07,
247
+ "loss": 0.7186,
248
  "step": 340
249
  },
250
  {
251
  "epoch": 0.0224,
252
+ "grad_norm": 132493.0,
253
+ "learning_rate": 8.934971838197646e-07,
254
+ "loss": 0.7133,
255
  "step": 350
256
  },
257
  {
258
  "epoch": 0.02304,
259
+ "grad_norm": 85223.625,
260
+ "learning_rate": 9.190988223246289e-07,
261
+ "loss": 0.7124,
262
  "step": 360
263
  },
264
  {
265
  "epoch": 0.02368,
266
+ "grad_norm": 77868.78125,
267
+ "learning_rate": 9.447004608294931e-07,
268
+ "loss": 0.7058,
269
  "step": 370
270
  },
271
  {
272
  "epoch": 0.02432,
273
+ "grad_norm": 75874.3046875,
274
+ "learning_rate": 9.703020993343575e-07,
275
+ "loss": 0.7139,
276
  "step": 380
277
  },
278
  {
279
  "epoch": 0.02496,
280
+ "grad_norm": 151937.703125,
281
+ "learning_rate": 9.959037378392218e-07,
282
+ "loss": 0.713,
283
  "step": 390
284
  },
285
  {
286
  "epoch": 0.0256,
287
+ "grad_norm": 161711.671875,
288
+ "learning_rate": 1.021505376344086e-06,
289
+ "loss": 0.7137,
290
  "step": 400
291
  },
292
  {
293
  "epoch": 0.02624,
294
+ "grad_norm": 90800.234375,
295
+ "learning_rate": 1.0471070148489503e-06,
296
+ "loss": 0.7091,
297
  "step": 410
298
  },
299
  {
300
  "epoch": 0.02688,
301
+ "grad_norm": 82131.34375,
302
+ "learning_rate": 1.0727086533538148e-06,
303
+ "loss": 0.7098,
304
  "step": 420
305
  },
306
  {
307
  "epoch": 0.02752,
308
+ "grad_norm": 92818.9140625,
309
+ "learning_rate": 1.0983102918586791e-06,
310
+ "loss": 0.7099,
311
  "step": 430
312
  },
313
  {
314
  "epoch": 0.02816,
315
+ "grad_norm": 88555.5078125,
316
+ "learning_rate": 1.1239119303635434e-06,
317
+ "loss": 0.7086,
318
  "step": 440
319
  },
320
  {
321
  "epoch": 0.0288,
322
+ "grad_norm": 73428.6015625,
323
+ "learning_rate": 1.1495135688684077e-06,
324
+ "loss": 0.7117,
325
  "step": 450
326
  },
327
  {
328
  "epoch": 0.02944,
329
+ "grad_norm": 128938.7421875,
330
+ "learning_rate": 1.175115207373272e-06,
331
+ "loss": 0.7182,
332
  "step": 460
333
  },
334
  {
335
  "epoch": 0.03008,
336
+ "grad_norm": 102742.3359375,
337
+ "learning_rate": 1.2007168458781362e-06,
338
+ "loss": 0.7108,
339
  "step": 470
340
  },
341
  {
342
  "epoch": 0.03072,
343
+ "grad_norm": 73825.8125,
344
+ "learning_rate": 1.2263184843830007e-06,
345
+ "loss": 0.7087,
346
  "step": 480
347
  },
348
  {
349
  "epoch": 0.03136,
350
+ "grad_norm": 110930.75,
351
+ "learning_rate": 1.251920122887865e-06,
352
+ "loss": 0.7232,
353
  "step": 490
354
  },
355
  {
356
  "epoch": 0.032,
357
+ "grad_norm": 95068.84375,
358
+ "learning_rate": 1.2775217613927293e-06,
359
+ "loss": 0.703,
360
  "step": 500
361
  },
362
  {
363
  "epoch": 0.03264,
364
+ "grad_norm": 118731.9296875,
365
+ "learning_rate": 1.3031233998975938e-06,
366
+ "loss": 0.7063,
367
  "step": 510
368
  },
369
  {
370
  "epoch": 0.03328,
371
+ "grad_norm": 80511.828125,
372
+ "learning_rate": 1.3287250384024578e-06,
373
+ "loss": 0.7143,
374
  "step": 520
375
  },
376
  {
377
  "epoch": 0.03392,
378
+ "grad_norm": 84864.484375,
379
+ "learning_rate": 1.354326676907322e-06,
380
+ "loss": 0.7055,
381
  "step": 530
382
  },
383
  {
384
  "epoch": 0.03456,
385
+ "grad_norm": 107800.109375,
386
+ "learning_rate": 1.3799283154121864e-06,
387
+ "loss": 0.7119,
388
  "step": 540
389
  },
390
  {
391
  "epoch": 0.0352,
392
+ "grad_norm": 83667.671875,
393
+ "learning_rate": 1.4055299539170509e-06,
394
+ "loss": 0.7082,
395
  "step": 550
396
  },
397
  {
398
  "epoch": 0.03584,
399
+ "grad_norm": 75656.4140625,
400
+ "learning_rate": 1.4311315924219151e-06,
401
+ "loss": 0.7062,
402
  "step": 560
403
  },
404
  {
405
  "epoch": 0.03648,
406
+ "grad_norm": 79985.875,
407
+ "learning_rate": 1.4567332309267796e-06,
408
+ "loss": 0.7155,
409
  "step": 570
410
  },
411
  {
412
  "epoch": 0.03712,
413
+ "grad_norm": 76334.078125,
414
+ "learning_rate": 1.4823348694316437e-06,
415
+ "loss": 0.7075,
416
  "step": 580
417
  },
418
  {
419
  "epoch": 0.03776,
420
+ "grad_norm": 140764.03125,
421
+ "learning_rate": 1.507936507936508e-06,
422
+ "loss": 0.7065,
423
  "step": 590
424
  },
425
  {
426
  "epoch": 0.0384,
427
+ "grad_norm": 100877.296875,
428
+ "learning_rate": 1.5335381464413722e-06,
429
+ "loss": 0.7096,
430
  "step": 600
431
  }
432
  ],
433
  "logging_steps": 10,
434
+ "max_steps": 78125,
435
  "num_input_tokens_seen": 0,
436
+ "num_train_epochs": 5,
437
  "save_steps": 200,
438
  "stateful_callbacks": {
439
  "EarlyStoppingCallback": {
 
456
  "attributes": {}
457
  }
458
  },
459
+ "total_flos": 5049397152295680.0,
460
  "train_batch_size": 32,
461
  "trial_name": null,
462
  "trial_params": null
graphcodebert-robust/checkpoint-600/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ff659b85d84ec0bae53596bc271ba773db9c463626db0f13fd8e747f433dad4
3
  size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5fc4023adc281644437a690ea6a6001846d7762699cd428d4ad38e1888076db
3
  size 5841
graphcodebert-robust/checkpoint-800/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fe33dfcd47347ccd7588a3d6ffc124e8af6931e6cc5edf7eff42f416eb814e9
3
  size 498612824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d19fdc7a5fa21c91052f15414ec14e1da4bbc85f75aa66510c1c463b2f14e2f6
3
  size 498612824
graphcodebert-robust/checkpoint-800/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33bc19163178e07929f74c9874b8faa2235856319b19a9f384fc4e2fcd84fe4c
3
  size 4741923
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddec1d294331a984f4091595913e06b171ba550334d359ca9c07a294409ad9c1
3
  size 4741923
graphcodebert-robust/checkpoint-800/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08e89a90a52ab59c17ebc709062022104092de74a5a21eaffb6603a247770d61
3
- size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36c90ae3575630687b6a7d64bf93dded50adb1dbab4b74db0c9cdd2945f93577
3
+ size 14581
graphcodebert-robust/checkpoint-800/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fef122931c86c2d2736773be787da21ac6460d41580735381e953556fb410be
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
3
  size 1383
graphcodebert-robust/checkpoint-800/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c662a8bb63968a394648b28695827df4fdf4db740c41d1df9edbc67160fd052
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63a6e34118894da77328dc4487914a7b9b9dbb71f404e8060d27ed90073c6190
3
  size 1465
graphcodebert-robust/checkpoint-800/tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
graphcodebert-robust/checkpoint-800/trainer_state.json CHANGED
@@ -151,429 +151,429 @@
151
  },
152
  {
153
  "epoch": 0.01344,
154
- "grad_norm": 2.1326472759246826,
155
- "learning_rate": 2.6752000000000006e-07,
156
- "loss": 0.7305,
157
  "step": 210
158
  },
159
  {
160
  "epoch": 0.01408,
161
- "grad_norm": 2.21703839302063,
162
- "learning_rate": 2.8032e-07,
163
- "loss": 0.7167,
164
  "step": 220
165
  },
166
  {
167
  "epoch": 0.01472,
168
- "grad_norm": 1.6385700702667236,
169
- "learning_rate": 2.9312e-07,
170
- "loss": 0.7209,
171
  "step": 230
172
  },
173
  {
174
  "epoch": 0.01536,
175
- "grad_norm": 1.4293471574783325,
176
- "learning_rate": 3.0592000000000003e-07,
177
- "loss": 0.722,
178
  "step": 240
179
  },
180
  {
181
  "epoch": 0.016,
182
- "grad_norm": 2.1437904834747314,
183
- "learning_rate": 3.1872e-07,
184
- "loss": 0.717,
185
  "step": 250
186
  },
187
  {
188
  "epoch": 0.01664,
189
- "grad_norm": 2.014806032180786,
190
- "learning_rate": 3.3152000000000005e-07,
191
- "loss": 0.7182,
192
  "step": 260
193
  },
194
  {
195
  "epoch": 0.01728,
196
- "grad_norm": 1.7216386795043945,
197
- "learning_rate": 3.4432e-07,
198
- "loss": 0.7253,
199
  "step": 270
200
  },
201
  {
202
  "epoch": 0.01792,
203
- "grad_norm": 1.4267009496688843,
204
- "learning_rate": 3.5712e-07,
205
- "loss": 0.7189,
206
  "step": 280
207
  },
208
  {
209
  "epoch": 0.01856,
210
- "grad_norm": 2.222503185272217,
211
- "learning_rate": 3.6992e-07,
212
- "loss": 0.7198,
213
  "step": 290
214
  },
215
  {
216
  "epoch": 0.0192,
217
- "grad_norm": 1.578922986984253,
218
- "learning_rate": 3.8272000000000003e-07,
219
- "loss": 0.717,
220
  "step": 300
221
  },
222
  {
223
  "epoch": 0.01984,
224
- "grad_norm": 1.719905972480774,
225
- "learning_rate": 3.9552e-07,
226
- "loss": 0.709,
227
  "step": 310
228
  },
229
  {
230
  "epoch": 0.02048,
231
- "grad_norm": 1.4473963975906372,
232
- "learning_rate": 4.0832000000000005e-07,
233
- "loss": 0.7215,
234
  "step": 320
235
  },
236
  {
237
  "epoch": 0.02112,
238
- "grad_norm": 2.1639790534973145,
239
- "learning_rate": 4.2112e-07,
240
- "loss": 0.7175,
241
  "step": 330
242
  },
243
  {
244
  "epoch": 0.02176,
245
- "grad_norm": 1.2387958765029907,
246
- "learning_rate": 4.3392e-07,
247
- "loss": 0.7129,
248
  "step": 340
249
  },
250
  {
251
  "epoch": 0.0224,
252
- "grad_norm": 2.2797842025756836,
253
- "learning_rate": 4.4672000000000007e-07,
254
- "loss": 0.7159,
255
  "step": 350
256
  },
257
  {
258
  "epoch": 0.02304,
259
- "grad_norm": 1.5692473649978638,
260
- "learning_rate": 4.5952000000000003e-07,
261
- "loss": 0.7161,
262
  "step": 360
263
  },
264
  {
265
  "epoch": 0.02368,
266
- "grad_norm": 1.4270817041397095,
267
- "learning_rate": 4.723200000000001e-07,
268
- "loss": 0.7114,
269
  "step": 370
270
  },
271
  {
272
  "epoch": 0.02432,
273
- "grad_norm": 1.4091335535049438,
274
- "learning_rate": 4.8512e-07,
275
- "loss": 0.7127,
276
  "step": 380
277
  },
278
  {
279
  "epoch": 0.02496,
280
- "grad_norm": 1.8862844705581665,
281
- "learning_rate": 4.979200000000001e-07,
282
- "loss": 0.7153,
283
  "step": 390
284
  },
285
  {
286
  "epoch": 0.0256,
287
- "grad_norm": 1.9264376163482666,
288
- "learning_rate": 5.107200000000001e-07,
289
- "loss": 0.7109,
290
  "step": 400
291
  },
292
  {
293
  "epoch": 0.02624,
294
- "grad_norm": 1.4058727025985718,
295
- "learning_rate": 5.235200000000001e-07,
296
- "loss": 0.705,
297
  "step": 410
298
  },
299
  {
300
  "epoch": 0.02688,
301
- "grad_norm": 1.519445776939392,
302
- "learning_rate": 5.363200000000001e-07,
303
- "loss": 0.7131,
304
  "step": 420
305
  },
306
  {
307
  "epoch": 0.02752,
308
- "grad_norm": 1.6636698246002197,
309
- "learning_rate": 5.491200000000001e-07,
310
- "loss": 0.6916,
311
  "step": 430
312
  },
313
  {
314
  "epoch": 0.02816,
315
- "grad_norm": 1.5472590923309326,
316
- "learning_rate": 5.6192e-07,
317
- "loss": 0.705,
318
  "step": 440
319
  },
320
  {
321
  "epoch": 0.0288,
322
- "grad_norm": 1.4896206855773926,
323
- "learning_rate": 5.747200000000001e-07,
324
- "loss": 0.7046,
325
  "step": 450
326
  },
327
  {
328
  "epoch": 0.02944,
329
- "grad_norm": 2.2565503120422363,
330
- "learning_rate": 5.8752e-07,
331
- "loss": 0.7009,
332
  "step": 460
333
  },
334
  {
335
  "epoch": 0.03008,
336
- "grad_norm": 2.017638683319092,
337
- "learning_rate": 6.0032e-07,
338
- "loss": 0.7058,
339
  "step": 470
340
  },
341
  {
342
  "epoch": 0.03072,
343
- "grad_norm": 1.3399696350097656,
344
- "learning_rate": 6.1312e-07,
345
- "loss": 0.7003,
346
  "step": 480
347
  },
348
  {
349
  "epoch": 0.03136,
350
- "grad_norm": 1.3090866804122925,
351
- "learning_rate": 6.2592e-07,
352
- "loss": 0.7067,
353
  "step": 490
354
  },
355
  {
356
  "epoch": 0.032,
357
- "grad_norm": 1.4199142456054688,
358
- "learning_rate": 6.3872e-07,
359
- "loss": 0.7008,
360
  "step": 500
361
  },
362
  {
363
  "epoch": 0.03264,
364
- "grad_norm": 1.7174904346466064,
365
- "learning_rate": 6.515200000000001e-07,
366
- "loss": 0.7003,
367
  "step": 510
368
  },
369
  {
370
  "epoch": 0.03328,
371
- "grad_norm": 1.2983943223953247,
372
- "learning_rate": 6.643200000000001e-07,
373
- "loss": 0.698,
374
  "step": 520
375
  },
376
  {
377
  "epoch": 0.03392,
378
- "grad_norm": 1.8224154710769653,
379
- "learning_rate": 6.7712e-07,
380
- "loss": 0.7047,
381
  "step": 530
382
  },
383
  {
384
  "epoch": 0.03456,
385
- "grad_norm": 1.3605278730392456,
386
- "learning_rate": 6.899200000000001e-07,
387
- "loss": 0.6974,
388
  "step": 540
389
  },
390
  {
391
  "epoch": 0.0352,
392
- "grad_norm": 1.4932376146316528,
393
- "learning_rate": 7.027200000000001e-07,
394
- "loss": 0.6918,
395
  "step": 550
396
  },
397
  {
398
  "epoch": 0.03584,
399
- "grad_norm": 1.2169368267059326,
400
- "learning_rate": 7.155200000000001e-07,
401
- "loss": 0.6996,
402
  "step": 560
403
  },
404
  {
405
  "epoch": 0.03648,
406
- "grad_norm": 1.5690464973449707,
407
- "learning_rate": 7.2832e-07,
408
- "loss": 0.6942,
409
  "step": 570
410
  },
411
  {
412
  "epoch": 0.03712,
413
- "grad_norm": 1.541991949081421,
414
- "learning_rate": 7.4112e-07,
415
- "loss": 0.6973,
416
  "step": 580
417
  },
418
  {
419
  "epoch": 0.03776,
420
- "grad_norm": 1.7749661207199097,
421
- "learning_rate": 7.5392e-07,
422
- "loss": 0.6865,
423
  "step": 590
424
  },
425
  {
426
  "epoch": 0.0384,
427
- "grad_norm": 1.2169281244277954,
428
- "learning_rate": 7.667200000000001e-07,
429
- "loss": 0.6876,
430
  "step": 600
431
  },
432
  {
433
  "epoch": 0.03904,
434
- "grad_norm": 1.992397427558899,
435
- "learning_rate": 7.7952e-07,
436
- "loss": 0.6882,
437
  "step": 610
438
  },
439
  {
440
  "epoch": 0.03968,
441
- "grad_norm": 1.5149959325790405,
442
- "learning_rate": 7.9232e-07,
443
- "loss": 0.6838,
444
  "step": 620
445
  },
446
  {
447
  "epoch": 0.04032,
448
- "grad_norm": 1.707815170288086,
449
- "learning_rate": 8.0512e-07,
450
- "loss": 0.6799,
451
  "step": 630
452
  },
453
  {
454
  "epoch": 0.04096,
455
- "grad_norm": 1.2718374729156494,
456
- "learning_rate": 8.179200000000001e-07,
457
- "loss": 0.6807,
458
  "step": 640
459
  },
460
  {
461
  "epoch": 0.0416,
462
- "grad_norm": 1.4209625720977783,
463
- "learning_rate": 8.3072e-07,
464
- "loss": 0.6892,
465
  "step": 650
466
  },
467
  {
468
  "epoch": 0.04224,
469
- "grad_norm": 1.295621633529663,
470
- "learning_rate": 8.435200000000001e-07,
471
- "loss": 0.6827,
472
  "step": 660
473
  },
474
  {
475
  "epoch": 0.04288,
476
- "grad_norm": 1.1997125148773193,
477
- "learning_rate": 8.563200000000001e-07,
478
- "loss": 0.6815,
479
  "step": 670
480
  },
481
  {
482
  "epoch": 0.04352,
483
- "grad_norm": 1.377269983291626,
484
- "learning_rate": 8.691200000000001e-07,
485
- "loss": 0.6848,
486
  "step": 680
487
  },
488
  {
489
  "epoch": 0.04416,
490
- "grad_norm": 1.3606722354888916,
491
- "learning_rate": 8.819200000000001e-07,
492
- "loss": 0.6812,
493
  "step": 690
494
  },
495
  {
496
  "epoch": 0.0448,
497
- "grad_norm": 1.416563630104065,
498
- "learning_rate": 8.9472e-07,
499
- "loss": 0.6844,
500
  "step": 700
501
  },
502
  {
503
  "epoch": 0.04544,
504
- "grad_norm": 1.6803165674209595,
505
- "learning_rate": 9.0752e-07,
506
- "loss": 0.6782,
507
  "step": 710
508
  },
509
  {
510
  "epoch": 0.04608,
511
- "grad_norm": 1.4515128135681152,
512
- "learning_rate": 9.203200000000001e-07,
513
- "loss": 0.671,
514
  "step": 720
515
  },
516
  {
517
  "epoch": 0.04672,
518
- "grad_norm": 1.3443140983581543,
519
- "learning_rate": 9.331200000000001e-07,
520
- "loss": 0.6754,
521
  "step": 730
522
  },
523
  {
524
  "epoch": 0.04736,
525
- "grad_norm": 1.8091343641281128,
526
- "learning_rate": 9.4592e-07,
527
- "loss": 0.6651,
528
  "step": 740
529
  },
530
  {
531
  "epoch": 0.048,
532
- "grad_norm": 1.9745153188705444,
533
- "learning_rate": 9.587200000000002e-07,
534
- "loss": 0.6724,
535
  "step": 750
536
  },
537
  {
538
  "epoch": 0.04864,
539
- "grad_norm": 1.2002370357513428,
540
- "learning_rate": 9.7152e-07,
541
- "loss": 0.6734,
542
  "step": 760
543
  },
544
  {
545
  "epoch": 0.04928,
546
- "grad_norm": 1.4024569988250732,
547
- "learning_rate": 9.843200000000002e-07,
548
- "loss": 0.6735,
549
  "step": 770
550
  },
551
  {
552
  "epoch": 0.04992,
553
- "grad_norm": 1.3709276914596558,
554
- "learning_rate": 9.9712e-07,
555
- "loss": 0.6643,
556
  "step": 780
557
  },
558
  {
559
  "epoch": 0.05056,
560
- "grad_norm": 2.260002851486206,
561
- "learning_rate": 1.00992e-06,
562
- "loss": 0.6634,
563
  "step": 790
564
  },
565
  {
566
  "epoch": 0.0512,
567
- "grad_norm": 1.0970172882080078,
568
- "learning_rate": 1.02272e-06,
569
- "loss": 0.6721,
570
  "step": 800
571
  }
572
  ],
573
  "logging_steps": 10,
574
- "max_steps": 156250,
575
  "num_input_tokens_seen": 0,
576
- "num_train_epochs": 10,
577
  "save_steps": 200,
578
  "stateful_callbacks": {
579
  "EarlyStoppingCallback": {
@@ -596,7 +596,7 @@
596
  "attributes": {}
597
  }
598
  },
599
- "total_flos": 6735643017216000.0,
600
  "train_batch_size": 32,
601
  "trial_name": null,
602
  "trial_params": null
 
151
  },
152
  {
153
  "epoch": 0.01344,
154
+ "grad_norm": 141396.296875,
155
+ "learning_rate": 5.350742447516642e-07,
156
+ "loss": 0.7217,
157
  "step": 210
158
  },
159
  {
160
  "epoch": 0.01408,
161
+ "grad_norm": 102339.1640625,
162
+ "learning_rate": 5.606758832565284e-07,
163
+ "loss": 0.7215,
164
  "step": 220
165
  },
166
  {
167
  "epoch": 0.01472,
168
+ "grad_norm": 134052.9375,
169
+ "learning_rate": 5.862775217613928e-07,
170
+ "loss": 0.7115,
171
  "step": 230
172
  },
173
  {
174
  "epoch": 0.01536,
175
+ "grad_norm": 87181.984375,
176
+ "learning_rate": 6.118791602662571e-07,
177
+ "loss": 0.7241,
178
  "step": 240
179
  },
180
  {
181
  "epoch": 0.016,
182
+ "grad_norm": 100231.328125,
183
+ "learning_rate": 6.374807987711214e-07,
184
+ "loss": 0.71,
185
  "step": 250
186
  },
187
  {
188
  "epoch": 0.01664,
189
+ "grad_norm": 136721.484375,
190
+ "learning_rate": 6.630824372759858e-07,
191
+ "loss": 0.7188,
192
  "step": 260
193
  },
194
  {
195
  "epoch": 0.01728,
196
+ "grad_norm": 115868.8125,
197
+ "learning_rate": 6.8868407578085e-07,
198
+ "loss": 0.7199,
199
  "step": 270
200
  },
201
  {
202
  "epoch": 0.01792,
203
+ "grad_norm": 70205.1484375,
204
+ "learning_rate": 7.142857142857143e-07,
205
+ "loss": 0.7299,
206
  "step": 280
207
  },
208
  {
209
  "epoch": 0.01856,
210
+ "grad_norm": 98926.4453125,
211
+ "learning_rate": 7.398873527905787e-07,
212
+ "loss": 0.7159,
213
  "step": 290
214
  },
215
  {
216
  "epoch": 0.0192,
217
+ "grad_norm": 134108.140625,
218
+ "learning_rate": 7.65488991295443e-07,
219
+ "loss": 0.7122,
220
  "step": 300
221
  },
222
  {
223
  "epoch": 0.01984,
224
+ "grad_norm": 103719.140625,
225
+ "learning_rate": 7.910906298003073e-07,
226
+ "loss": 0.7185,
227
  "step": 310
228
  },
229
  {
230
  "epoch": 0.02048,
231
+ "grad_norm": 85624.953125,
232
+ "learning_rate": 8.166922683051716e-07,
233
+ "loss": 0.718,
234
  "step": 320
235
  },
236
  {
237
  "epoch": 0.02112,
238
+ "grad_norm": 138824.15625,
239
+ "learning_rate": 8.422939068100359e-07,
240
+ "loss": 0.713,
241
  "step": 330
242
  },
243
  {
244
  "epoch": 0.02176,
245
+ "grad_norm": 73629.0859375,
246
+ "learning_rate": 8.678955453149002e-07,
247
+ "loss": 0.7186,
248
  "step": 340
249
  },
250
  {
251
  "epoch": 0.0224,
252
+ "grad_norm": 132493.0,
253
+ "learning_rate": 8.934971838197646e-07,
254
+ "loss": 0.7133,
255
  "step": 350
256
  },
257
  {
258
  "epoch": 0.02304,
259
+ "grad_norm": 85223.625,
260
+ "learning_rate": 9.190988223246289e-07,
261
+ "loss": 0.7124,
262
  "step": 360
263
  },
264
  {
265
  "epoch": 0.02368,
266
+ "grad_norm": 77868.78125,
267
+ "learning_rate": 9.447004608294931e-07,
268
+ "loss": 0.7058,
269
  "step": 370
270
  },
271
  {
272
  "epoch": 0.02432,
273
+ "grad_norm": 75874.3046875,
274
+ "learning_rate": 9.703020993343575e-07,
275
+ "loss": 0.7139,
276
  "step": 380
277
  },
278
  {
279
  "epoch": 0.02496,
280
+ "grad_norm": 151937.703125,
281
+ "learning_rate": 9.959037378392218e-07,
282
+ "loss": 0.713,
283
  "step": 390
284
  },
285
  {
286
  "epoch": 0.0256,
287
+ "grad_norm": 161711.671875,
288
+ "learning_rate": 1.021505376344086e-06,
289
+ "loss": 0.7137,
290
  "step": 400
291
  },
292
  {
293
  "epoch": 0.02624,
294
+ "grad_norm": 90800.234375,
295
+ "learning_rate": 1.0471070148489503e-06,
296
+ "loss": 0.7091,
297
  "step": 410
298
  },
299
  {
300
  "epoch": 0.02688,
301
+ "grad_norm": 82131.34375,
302
+ "learning_rate": 1.0727086533538148e-06,
303
+ "loss": 0.7098,
304
  "step": 420
305
  },
306
  {
307
  "epoch": 0.02752,
308
+ "grad_norm": 92818.9140625,
309
+ "learning_rate": 1.0983102918586791e-06,
310
+ "loss": 0.7099,
311
  "step": 430
312
  },
313
  {
314
  "epoch": 0.02816,
315
+ "grad_norm": 88555.5078125,
316
+ "learning_rate": 1.1239119303635434e-06,
317
+ "loss": 0.7086,
318
  "step": 440
319
  },
320
  {
321
  "epoch": 0.0288,
322
+ "grad_norm": 73428.6015625,
323
+ "learning_rate": 1.1495135688684077e-06,
324
+ "loss": 0.7117,
325
  "step": 450
326
  },
327
  {
328
  "epoch": 0.02944,
329
+ "grad_norm": 128938.7421875,
330
+ "learning_rate": 1.175115207373272e-06,
331
+ "loss": 0.7182,
332
  "step": 460
333
  },
334
  {
335
  "epoch": 0.03008,
336
+ "grad_norm": 102742.3359375,
337
+ "learning_rate": 1.2007168458781362e-06,
338
+ "loss": 0.7108,
339
  "step": 470
340
  },
341
  {
342
  "epoch": 0.03072,
343
+ "grad_norm": 73825.8125,
344
+ "learning_rate": 1.2263184843830007e-06,
345
+ "loss": 0.7087,
346
  "step": 480
347
  },
348
  {
349
  "epoch": 0.03136,
350
+ "grad_norm": 110930.75,
351
+ "learning_rate": 1.251920122887865e-06,
352
+ "loss": 0.7232,
353
  "step": 490
354
  },
355
  {
356
  "epoch": 0.032,
357
+ "grad_norm": 95068.84375,
358
+ "learning_rate": 1.2775217613927293e-06,
359
+ "loss": 0.703,
360
  "step": 500
361
  },
362
  {
363
  "epoch": 0.03264,
364
+ "grad_norm": 118731.9296875,
365
+ "learning_rate": 1.3031233998975938e-06,
366
+ "loss": 0.7063,
367
  "step": 510
368
  },
369
  {
370
  "epoch": 0.03328,
371
+ "grad_norm": 80511.828125,
372
+ "learning_rate": 1.3287250384024578e-06,
373
+ "loss": 0.7143,
374
  "step": 520
375
  },
376
  {
377
  "epoch": 0.03392,
378
+ "grad_norm": 84864.484375,
379
+ "learning_rate": 1.354326676907322e-06,
380
+ "loss": 0.7055,
381
  "step": 530
382
  },
383
  {
384
  "epoch": 0.03456,
385
+ "grad_norm": 107800.109375,
386
+ "learning_rate": 1.3799283154121864e-06,
387
+ "loss": 0.7119,
388
  "step": 540
389
  },
390
  {
391
  "epoch": 0.0352,
392
+ "grad_norm": 83667.671875,
393
+ "learning_rate": 1.4055299539170509e-06,
394
+ "loss": 0.7082,
395
  "step": 550
396
  },
397
  {
398
  "epoch": 0.03584,
399
+ "grad_norm": 75656.4140625,
400
+ "learning_rate": 1.4311315924219151e-06,
401
+ "loss": 0.7062,
402
  "step": 560
403
  },
404
  {
405
  "epoch": 0.03648,
406
+ "grad_norm": 79985.875,
407
+ "learning_rate": 1.4567332309267796e-06,
408
+ "loss": 0.7155,
409
  "step": 570
410
  },
411
  {
412
  "epoch": 0.03712,
413
+ "grad_norm": 76334.078125,
414
+ "learning_rate": 1.4823348694316437e-06,
415
+ "loss": 0.7075,
416
  "step": 580
417
  },
418
  {
419
  "epoch": 0.03776,
420
+ "grad_norm": 140764.03125,
421
+ "learning_rate": 1.507936507936508e-06,
422
+ "loss": 0.7065,
423
  "step": 590
424
  },
425
  {
426
  "epoch": 0.0384,
427
+ "grad_norm": 100877.296875,
428
+ "learning_rate": 1.5335381464413722e-06,
429
+ "loss": 0.7096,
430
  "step": 600
431
  },
432
  {
433
  "epoch": 0.03904,
434
+ "grad_norm": 104088.1171875,
435
+ "learning_rate": 1.5591397849462367e-06,
436
+ "loss": 0.6987,
437
  "step": 610
438
  },
439
  {
440
  "epoch": 0.03968,
441
+ "grad_norm": 80806.2265625,
442
+ "learning_rate": 1.584741423451101e-06,
443
+ "loss": 0.707,
444
  "step": 620
445
  },
446
  {
447
  "epoch": 0.04032,
448
+ "grad_norm": 109884.765625,
449
+ "learning_rate": 1.6103430619559655e-06,
450
+ "loss": 0.6991,
451
  "step": 630
452
  },
453
  {
454
  "epoch": 0.04096,
455
+ "grad_norm": 79944.890625,
456
+ "learning_rate": 1.6359447004608298e-06,
457
+ "loss": 0.7047,
458
  "step": 640
459
  },
460
  {
461
  "epoch": 0.0416,
462
+ "grad_norm": 93673.3828125,
463
+ "learning_rate": 1.6615463389656938e-06,
464
+ "loss": 0.6971,
465
  "step": 650
466
  },
467
  {
468
  "epoch": 0.04224,
469
+ "grad_norm": 76641.265625,
470
+ "learning_rate": 1.6871479774705581e-06,
471
+ "loss": 0.6957,
472
  "step": 660
473
  },
474
  {
475
  "epoch": 0.04288,
476
+ "grad_norm": 73583.5546875,
477
+ "learning_rate": 1.7127496159754226e-06,
478
+ "loss": 0.7028,
479
  "step": 670
480
  },
481
  {
482
  "epoch": 0.04352,
483
+ "grad_norm": 75177.9609375,
484
+ "learning_rate": 1.7383512544802869e-06,
485
+ "loss": 0.7012,
486
  "step": 680
487
  },
488
  {
489
  "epoch": 0.04416,
490
+ "grad_norm": 78340.8515625,
491
+ "learning_rate": 1.7639528929851512e-06,
492
+ "loss": 0.6987,
493
  "step": 690
494
  },
495
  {
496
  "epoch": 0.0448,
497
+ "grad_norm": 86004.1171875,
498
+ "learning_rate": 1.7895545314900157e-06,
499
+ "loss": 0.7061,
500
  "step": 700
501
  },
502
  {
503
  "epoch": 0.04544,
504
+ "grad_norm": 94212.0390625,
505
+ "learning_rate": 1.8151561699948797e-06,
506
+ "loss": 0.6993,
507
  "step": 710
508
  },
509
  {
510
  "epoch": 0.04608,
511
+ "grad_norm": 83918.2421875,
512
+ "learning_rate": 1.840757808499744e-06,
513
+ "loss": 0.7009,
514
  "step": 720
515
  },
516
  {
517
  "epoch": 0.04672,
518
+ "grad_norm": 68374.3125,
519
+ "learning_rate": 1.8663594470046085e-06,
520
+ "loss": 0.6964,
521
  "step": 730
522
  },
523
  {
524
  "epoch": 0.04736,
525
+ "grad_norm": 90348.78125,
526
+ "learning_rate": 1.8919610855094728e-06,
527
+ "loss": 0.7011,
528
  "step": 740
529
  },
530
  {
531
  "epoch": 0.048,
532
+ "grad_norm": 146658.0,
533
+ "learning_rate": 1.9175627240143373e-06,
534
+ "loss": 0.7003,
535
  "step": 750
536
  },
537
  {
538
  "epoch": 0.04864,
539
+ "grad_norm": 112037.1640625,
540
+ "learning_rate": 1.9431643625192015e-06,
541
+ "loss": 0.7051,
542
  "step": 760
543
  },
544
  {
545
  "epoch": 0.04928,
546
+ "grad_norm": 70628.625,
547
+ "learning_rate": 1.9687660010240654e-06,
548
+ "loss": 0.6923,
549
  "step": 770
550
  },
551
  {
552
  "epoch": 0.04992,
553
+ "grad_norm": 109922.125,
554
+ "learning_rate": 1.99436763952893e-06,
555
+ "loss": 0.6893,
556
  "step": 780
557
  },
558
  {
559
  "epoch": 0.05056,
560
+ "grad_norm": 135306.375,
561
+ "learning_rate": 2.0199692780337944e-06,
562
+ "loss": 0.7008,
563
  "step": 790
564
  },
565
  {
566
  "epoch": 0.0512,
567
+ "grad_norm": 82354.8046875,
568
+ "learning_rate": 2.0455709165386586e-06,
569
+ "loss": 0.705,
570
  "step": 800
571
  }
572
  ],
573
  "logging_steps": 10,
574
+ "max_steps": 78125,
575
  "num_input_tokens_seen": 0,
576
+ "num_train_epochs": 5,
577
  "save_steps": 200,
578
  "stateful_callbacks": {
579
  "EarlyStoppingCallback": {
 
596
  "attributes": {}
597
  }
598
  },
599
+ "total_flos": 6733094128867200.0,
600
  "train_batch_size": 32,
601
  "trial_name": null,
602
  "trial_params": null
graphcodebert-robust/checkpoint-800/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ff659b85d84ec0bae53596bc271ba773db9c463626db0f13fd8e747f433dad4
3
  size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5fc4023adc281644437a690ea6a6001846d7762699cd428d4ad38e1888076db
3
  size 5841
graphcodebert-robust/training.log CHANGED
@@ -1,10 +1,10 @@
1
- 2026-04-15 17:47:48,928 - INFO - train_pipeline - Logging to ./output_checkpoints/graphcodebert-robust/training.log
2
- 2026-04-15 17:47:48,933 - INFO - train_pipeline - Training config: TrainConfig(model_name='microsoft/graphcodebert-base', output_dir='./output_checkpoints/graphcodebert-robust', num_epochs=10, batch_size=32, learning_rate=2e-05, max_length=512, num_labels=2, use_wandb=True, freeze_base=True, loss_type='r-drop', focal_alpha=1.0, focal_gamma=2.0, r_drop_alpha=4.0, infonce_temperature=0.07, infonce_weight=0.5, seed=42, resume_from_checkpoint=None, label_smoothing=0.1, adversarial_epsilon=0.5, use_swa=False, swa_start_epoch=2, swa_lr=1e-05, data_augmentation=True, aug_rename_prob=0.3, aug_format_prob=0.3, device=device(type='cuda'))
3
- 2026-04-15 17:47:48,936 - INFO - train_pipeline - Loading model & tokenizer for 'microsoft/graphcodebert-base'
4
- 2026-04-15 17:47:51,171 - INFO - train_pipeline - Model placed on cuda
5
- 2026-04-15 17:47:51,174 - INFO - train_pipeline - Base model weights frozen – only classifier head will be trained.
6
- 2026-04-15 17:47:51,177 - INFO - train_pipeline - ===== Model Architecture =====
7
- 2026-04-15 17:47:51,180 - INFO - train_pipeline -
8
  RobertaForSequenceClassification(
9
  (roberta): RobertaModel(
10
  (embeddings): RobertaEmbeddings(
@@ -49,12 +49,12 @@ RobertaForSequenceClassification(
49
  (out_proj): Linear(in_features=768, out_features=2, bias=True)
50
  )
51
  )
52
- 2026-04-15 17:47:51,186 - INFO - train_pipeline - ===== Parameter Summary =====
53
- 2026-04-15 17:47:51,189 - INFO - train_pipeline - Total Parameters: 124,647,170
54
- 2026-04-15 17:47:51,191 - INFO - train_pipeline - Trainable Parameters: 592,130
55
- 2026-04-15 17:47:51,193 - INFO - train_pipeline - Non-trainable Parameters: 124,055,040
56
- 2026-04-15 17:47:51,195 - INFO - train_pipeline - ===== Tokenizer Summary =====
57
- 2026-04-15 17:47:51,224 - INFO - train_pipeline - Vocab size: 50265 | Special tokens: ['<s>', '</s>', '<unk>', '<pad>', '<mask>']
58
- 2026-04-15 17:47:51,227 - INFO - train_pipeline - ===== End of Architecture Log =====
59
- 2026-04-15 17:47:54,338 - INFO - train_pipeline - Data augmentation enabled (rename=0.3, format=0.3)
60
- 2026-04-15 17:47:54,374 - INFO - train_pipeline - === Starting training with robust regularisation ===
 
1
+ 2026-04-16 09:18:35,093 - INFO - train_pipeline - Logging to ./output_checkpoints/graphcodebert-robust/training.log
2
+ 2026-04-16 09:18:35,094 - INFO - train_pipeline - Training config: TrainConfig(model_name='microsoft/graphcodebert-base', output_dir='./output_checkpoints/graphcodebert-robust', num_epochs=5, batch_size=32, learning_rate=2e-05, max_length=512, num_labels=2, use_wandb=True, freeze_base=True, loss_type='r-drop', focal_alpha=1.0, focal_gamma=2.0, r_drop_alpha=4.0, infonce_temperature=0.07, infonce_weight=0.5, seed=42, resume_from_checkpoint='checkpoints/graphcodebert-robust/checkpoint-200', label_smoothing=0.1, adversarial_epsilon=0.5, use_swa=True, swa_start_epoch=2, swa_lr=1e-05, data_augmentation=True, aug_rename_prob=0.3, aug_format_prob=0.3, device=device(type='cuda'))
3
+ 2026-04-16 09:18:35,094 - INFO - train_pipeline - Loading model & tokenizer for 'microsoft/graphcodebert-base'
4
+ 2026-04-16 09:18:43,368 - INFO - train_pipeline - Model placed on cuda
5
+ 2026-04-16 09:18:43,371 - INFO - train_pipeline - Base model weights frozen – only classifier head will be trained.
6
+ 2026-04-16 09:18:43,372 - INFO - train_pipeline - ===== Model Architecture =====
7
+ 2026-04-16 09:18:43,375 - INFO - train_pipeline -
8
  RobertaForSequenceClassification(
9
  (roberta): RobertaModel(
10
  (embeddings): RobertaEmbeddings(
 
49
  (out_proj): Linear(in_features=768, out_features=2, bias=True)
50
  )
51
  )
52
+ 2026-04-16 09:18:43,377 - INFO - train_pipeline - ===== Parameter Summary =====
53
+ 2026-04-16 09:18:43,378 - INFO - train_pipeline - Total Parameters: 124,647,170
54
+ 2026-04-16 09:18:43,380 - INFO - train_pipeline - Trainable Parameters: 592,130
55
+ 2026-04-16 09:18:43,381 - INFO - train_pipeline - Non-trainable Parameters: 124,055,040
56
+ 2026-04-16 09:18:43,381 - INFO - train_pipeline - ===== Tokenizer Summary =====
57
+ 2026-04-16 09:18:43,409 - INFO - train_pipeline - Vocab size: 50265 | Special tokens: ['<s>', '</s>', '<unk>', '<pad>', '<mask>']
58
+ 2026-04-16 09:18:43,410 - INFO - train_pipeline - ===== End of Architecture Log =====
59
+ 2026-04-16 09:18:43,411 - INFO - train_pipeline - Data augmentation enabled (rename=0.3, format=0.3)
60
+ 2026-04-16 09:22:04,475 - INFO - train_pipeline - === Starting training with robust regularisation ===