dzungpham commited on
Commit
2a362eb
·
verified ·
1 Parent(s): f81d183

upload graphcodebert robust, best f1 score at 0.54 at robust checkpoint 200

Browse files
Files changed (40) hide show
  1. graphcodebert-robust/checkpoint-1200/config.json +28 -0
  2. graphcodebert-robust/checkpoint-1200/merges.txt +0 -0
  3. graphcodebert-robust/checkpoint-1200/model.safetensors +3 -0
  4. graphcodebert-robust/checkpoint-1200/optimizer.pt +3 -0
  5. graphcodebert-robust/checkpoint-1200/rng_state.pth +3 -0
  6. graphcodebert-robust/checkpoint-1200/scaler.pt +3 -0
  7. graphcodebert-robust/checkpoint-1200/scheduler.pt +3 -0
  8. graphcodebert-robust/checkpoint-1200/special_tokens_map.json +51 -0
  9. graphcodebert-robust/checkpoint-1200/tokenizer.json +0 -0
  10. graphcodebert-robust/checkpoint-1200/tokenizer_config.json +58 -0
  11. graphcodebert-robust/checkpoint-1200/trainer_state.json +893 -0
  12. graphcodebert-robust/checkpoint-1200/training_args.bin +3 -0
  13. graphcodebert-robust/checkpoint-1200/vocab.json +0 -0
  14. graphcodebert-robust/checkpoint-1400/config.json +28 -0
  15. graphcodebert-robust/checkpoint-1400/merges.txt +0 -0
  16. graphcodebert-robust/checkpoint-1400/model.safetensors +3 -0
  17. graphcodebert-robust/checkpoint-1400/optimizer.pt +3 -0
  18. graphcodebert-robust/checkpoint-1400/rng_state.pth +3 -0
  19. graphcodebert-robust/checkpoint-1400/scaler.pt +3 -0
  20. graphcodebert-robust/checkpoint-1400/scheduler.pt +3 -0
  21. graphcodebert-robust/checkpoint-1400/special_tokens_map.json +51 -0
  22. graphcodebert-robust/checkpoint-1400/tokenizer.json +0 -0
  23. graphcodebert-robust/checkpoint-1400/tokenizer_config.json +58 -0
  24. graphcodebert-robust/checkpoint-1400/trainer_state.json +1033 -0
  25. graphcodebert-robust/checkpoint-1400/training_args.bin +3 -0
  26. graphcodebert-robust/checkpoint-1400/vocab.json +0 -0
  27. graphcodebert-robust/checkpoint-1600/config.json +28 -0
  28. graphcodebert-robust/checkpoint-1600/merges.txt +0 -0
  29. graphcodebert-robust/checkpoint-1600/model.safetensors +3 -0
  30. graphcodebert-robust/checkpoint-1600/optimizer.pt +3 -0
  31. graphcodebert-robust/checkpoint-1600/rng_state.pth +3 -0
  32. graphcodebert-robust/checkpoint-1600/scaler.pt +3 -0
  33. graphcodebert-robust/checkpoint-1600/scheduler.pt +3 -0
  34. graphcodebert-robust/checkpoint-1600/special_tokens_map.json +51 -0
  35. graphcodebert-robust/checkpoint-1600/tokenizer.json +0 -0
  36. graphcodebert-robust/checkpoint-1600/tokenizer_config.json +58 -0
  37. graphcodebert-robust/checkpoint-1600/trainer_state.json +1173 -0
  38. graphcodebert-robust/checkpoint-1600/training_args.bin +3 -0
  39. graphcodebert-robust/checkpoint-1600/vocab.json +0 -0
  40. graphcodebert-robust/training.log +16 -16
graphcodebert-robust/checkpoint-1200/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.2,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.2,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-05,
17
+ "max_position_embeddings": 514,
18
+ "model_type": "roberta",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "output_past": true,
22
+ "pad_token_id": 1,
23
+ "position_embedding_type": "absolute",
24
+ "transformers_version": "4.56.0",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 50265
28
+ }
graphcodebert-robust/checkpoint-1200/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-robust/checkpoint-1200/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:316b13a957824f0017c9b455c08c7153d6878c1567c1beb2139a3623f86dd99c
3
+ size 498612824
graphcodebert-robust/checkpoint-1200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66cb7b2f904ee3503c3d9a2cb6758777eac1279fac309d47fae32267dab8b3f6
3
+ size 4741923
graphcodebert-robust/checkpoint-1200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaceda99e22a38324dd266a50443dbbb28981fc56530dd89c4d3d8da659af728
3
+ size 14581
graphcodebert-robust/checkpoint-1200/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
3
+ size 1383
graphcodebert-robust/checkpoint-1200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53459cc6ac514715c0a6bf016b14c38de84d51f78f8ffc1c0014ef91bdc5bf30
3
+ size 1465
graphcodebert-robust/checkpoint-1200/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
graphcodebert-robust/checkpoint-1200/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-robust/checkpoint-1200/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
graphcodebert-robust/checkpoint-1200/trainer_state.json ADDED
@@ -0,0 +1,893 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1000,
3
+ "best_metric": 0.7549859375827388,
4
+ "best_model_checkpoint": "./output_checkpoints/graphcodebert-robust/checkpoint-1000",
5
+ "epoch": 0.0768,
6
+ "eval_steps": 1000,
7
+ "global_step": 1200,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.00064,
14
+ "grad_norm": 1.6144306659698486,
15
+ "learning_rate": 1.1520000000000002e-08,
16
+ "loss": 0.729,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.00128,
21
+ "grad_norm": 2.0952296257019043,
22
+ "learning_rate": 2.4320000000000002e-08,
23
+ "loss": 0.7295,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.00192,
28
+ "grad_norm": 1.3587689399719238,
29
+ "learning_rate": 3.7120000000000004e-08,
30
+ "loss": 0.73,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.00256,
35
+ "grad_norm": 1.2531732320785522,
36
+ "learning_rate": 4.9920000000000006e-08,
37
+ "loss": 0.7221,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.0032,
42
+ "grad_norm": 1.437932014465332,
43
+ "learning_rate": 6.272000000000001e-08,
44
+ "loss": 0.7209,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.00384,
49
+ "grad_norm": 1.418426752090454,
50
+ "learning_rate": 7.552e-08,
51
+ "loss": 0.729,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.00448,
56
+ "grad_norm": 1.9476298093795776,
57
+ "learning_rate": 8.832e-08,
58
+ "loss": 0.7242,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.00512,
63
+ "grad_norm": 1.7948051691055298,
64
+ "learning_rate": 1.0112000000000001e-07,
65
+ "loss": 0.7227,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.00576,
70
+ "grad_norm": 1.6534360647201538,
71
+ "learning_rate": 1.1392e-07,
72
+ "loss": 0.7234,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.0064,
77
+ "grad_norm": 1.0920158624649048,
78
+ "learning_rate": 1.2672e-07,
79
+ "loss": 0.7328,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.00704,
84
+ "grad_norm": 1.977837085723877,
85
+ "learning_rate": 1.3952000000000002e-07,
86
+ "loss": 0.7263,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.00768,
91
+ "grad_norm": 1.388983130455017,
92
+ "learning_rate": 1.5232000000000003e-07,
93
+ "loss": 0.7286,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.00832,
98
+ "grad_norm": 1.2956682443618774,
99
+ "learning_rate": 1.6512e-07,
100
+ "loss": 0.7251,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.00896,
105
+ "grad_norm": 1.8125052452087402,
106
+ "learning_rate": 1.7792e-07,
107
+ "loss": 0.7251,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.0096,
112
+ "grad_norm": 1.626846194267273,
113
+ "learning_rate": 1.9072e-07,
114
+ "loss": 0.727,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.01024,
119
+ "grad_norm": 2.3243086338043213,
120
+ "learning_rate": 2.0352e-07,
121
+ "loss": 0.726,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 0.01088,
126
+ "grad_norm": 1.4734737873077393,
127
+ "learning_rate": 2.1632e-07,
128
+ "loss": 0.7252,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 0.01152,
133
+ "grad_norm": 2.090498685836792,
134
+ "learning_rate": 2.2912e-07,
135
+ "loss": 0.7273,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 0.01216,
140
+ "grad_norm": 1.7563093900680542,
141
+ "learning_rate": 2.4192000000000004e-07,
142
+ "loss": 0.719,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 0.0128,
147
+ "grad_norm": 1.449843168258667,
148
+ "learning_rate": 2.5472000000000005e-07,
149
+ "loss": 0.7237,
150
+ "step": 200
151
+ },
152
+ {
153
+ "epoch": 0.01344,
154
+ "grad_norm": 141396.296875,
155
+ "learning_rate": 5.350742447516642e-07,
156
+ "loss": 0.7217,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 0.01408,
161
+ "grad_norm": 102339.1640625,
162
+ "learning_rate": 5.606758832565284e-07,
163
+ "loss": 0.7215,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 0.01472,
168
+ "grad_norm": 134052.9375,
169
+ "learning_rate": 5.862775217613928e-07,
170
+ "loss": 0.7115,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 0.01536,
175
+ "grad_norm": 87181.984375,
176
+ "learning_rate": 6.118791602662571e-07,
177
+ "loss": 0.7241,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 0.016,
182
+ "grad_norm": 100231.328125,
183
+ "learning_rate": 6.374807987711214e-07,
184
+ "loss": 0.71,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 0.01664,
189
+ "grad_norm": 136721.484375,
190
+ "learning_rate": 6.630824372759858e-07,
191
+ "loss": 0.7188,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 0.01728,
196
+ "grad_norm": 115868.8125,
197
+ "learning_rate": 6.8868407578085e-07,
198
+ "loss": 0.7199,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 0.01792,
203
+ "grad_norm": 70205.1484375,
204
+ "learning_rate": 7.142857142857143e-07,
205
+ "loss": 0.7299,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 0.01856,
210
+ "grad_norm": 98926.4453125,
211
+ "learning_rate": 7.398873527905787e-07,
212
+ "loss": 0.7159,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 0.0192,
217
+ "grad_norm": 134108.140625,
218
+ "learning_rate": 7.65488991295443e-07,
219
+ "loss": 0.7122,
220
+ "step": 300
221
+ },
222
+ {
223
+ "epoch": 0.01984,
224
+ "grad_norm": 103719.140625,
225
+ "learning_rate": 7.910906298003073e-07,
226
+ "loss": 0.7185,
227
+ "step": 310
228
+ },
229
+ {
230
+ "epoch": 0.02048,
231
+ "grad_norm": 85624.953125,
232
+ "learning_rate": 8.166922683051716e-07,
233
+ "loss": 0.718,
234
+ "step": 320
235
+ },
236
+ {
237
+ "epoch": 0.02112,
238
+ "grad_norm": 138824.15625,
239
+ "learning_rate": 8.422939068100359e-07,
240
+ "loss": 0.713,
241
+ "step": 330
242
+ },
243
+ {
244
+ "epoch": 0.02176,
245
+ "grad_norm": 73629.0859375,
246
+ "learning_rate": 8.678955453149002e-07,
247
+ "loss": 0.7186,
248
+ "step": 340
249
+ },
250
+ {
251
+ "epoch": 0.0224,
252
+ "grad_norm": 132493.0,
253
+ "learning_rate": 8.934971838197646e-07,
254
+ "loss": 0.7133,
255
+ "step": 350
256
+ },
257
+ {
258
+ "epoch": 0.02304,
259
+ "grad_norm": 85223.625,
260
+ "learning_rate": 9.190988223246289e-07,
261
+ "loss": 0.7124,
262
+ "step": 360
263
+ },
264
+ {
265
+ "epoch": 0.02368,
266
+ "grad_norm": 77868.78125,
267
+ "learning_rate": 9.447004608294931e-07,
268
+ "loss": 0.7058,
269
+ "step": 370
270
+ },
271
+ {
272
+ "epoch": 0.02432,
273
+ "grad_norm": 75874.3046875,
274
+ "learning_rate": 9.703020993343575e-07,
275
+ "loss": 0.7139,
276
+ "step": 380
277
+ },
278
+ {
279
+ "epoch": 0.02496,
280
+ "grad_norm": 151937.703125,
281
+ "learning_rate": 9.959037378392218e-07,
282
+ "loss": 0.713,
283
+ "step": 390
284
+ },
285
+ {
286
+ "epoch": 0.0256,
287
+ "grad_norm": 161711.671875,
288
+ "learning_rate": 1.021505376344086e-06,
289
+ "loss": 0.7137,
290
+ "step": 400
291
+ },
292
+ {
293
+ "epoch": 0.02624,
294
+ "grad_norm": 90800.234375,
295
+ "learning_rate": 1.0471070148489503e-06,
296
+ "loss": 0.7091,
297
+ "step": 410
298
+ },
299
+ {
300
+ "epoch": 0.02688,
301
+ "grad_norm": 82131.34375,
302
+ "learning_rate": 1.0727086533538148e-06,
303
+ "loss": 0.7098,
304
+ "step": 420
305
+ },
306
+ {
307
+ "epoch": 0.02752,
308
+ "grad_norm": 92818.9140625,
309
+ "learning_rate": 1.0983102918586791e-06,
310
+ "loss": 0.7099,
311
+ "step": 430
312
+ },
313
+ {
314
+ "epoch": 0.02816,
315
+ "grad_norm": 88555.5078125,
316
+ "learning_rate": 1.1239119303635434e-06,
317
+ "loss": 0.7086,
318
+ "step": 440
319
+ },
320
+ {
321
+ "epoch": 0.0288,
322
+ "grad_norm": 73428.6015625,
323
+ "learning_rate": 1.1495135688684077e-06,
324
+ "loss": 0.7117,
325
+ "step": 450
326
+ },
327
+ {
328
+ "epoch": 0.02944,
329
+ "grad_norm": 128938.7421875,
330
+ "learning_rate": 1.175115207373272e-06,
331
+ "loss": 0.7182,
332
+ "step": 460
333
+ },
334
+ {
335
+ "epoch": 0.03008,
336
+ "grad_norm": 102742.3359375,
337
+ "learning_rate": 1.2007168458781362e-06,
338
+ "loss": 0.7108,
339
+ "step": 470
340
+ },
341
+ {
342
+ "epoch": 0.03072,
343
+ "grad_norm": 73825.8125,
344
+ "learning_rate": 1.2263184843830007e-06,
345
+ "loss": 0.7087,
346
+ "step": 480
347
+ },
348
+ {
349
+ "epoch": 0.03136,
350
+ "grad_norm": 110930.75,
351
+ "learning_rate": 1.251920122887865e-06,
352
+ "loss": 0.7232,
353
+ "step": 490
354
+ },
355
+ {
356
+ "epoch": 0.032,
357
+ "grad_norm": 95068.84375,
358
+ "learning_rate": 1.2775217613927293e-06,
359
+ "loss": 0.703,
360
+ "step": 500
361
+ },
362
+ {
363
+ "epoch": 0.03264,
364
+ "grad_norm": 118731.9296875,
365
+ "learning_rate": 1.3031233998975938e-06,
366
+ "loss": 0.7063,
367
+ "step": 510
368
+ },
369
+ {
370
+ "epoch": 0.03328,
371
+ "grad_norm": 80511.828125,
372
+ "learning_rate": 1.3287250384024578e-06,
373
+ "loss": 0.7143,
374
+ "step": 520
375
+ },
376
+ {
377
+ "epoch": 0.03392,
378
+ "grad_norm": 84864.484375,
379
+ "learning_rate": 1.354326676907322e-06,
380
+ "loss": 0.7055,
381
+ "step": 530
382
+ },
383
+ {
384
+ "epoch": 0.03456,
385
+ "grad_norm": 107800.109375,
386
+ "learning_rate": 1.3799283154121864e-06,
387
+ "loss": 0.7119,
388
+ "step": 540
389
+ },
390
+ {
391
+ "epoch": 0.0352,
392
+ "grad_norm": 83667.671875,
393
+ "learning_rate": 1.4055299539170509e-06,
394
+ "loss": 0.7082,
395
+ "step": 550
396
+ },
397
+ {
398
+ "epoch": 0.03584,
399
+ "grad_norm": 75656.4140625,
400
+ "learning_rate": 1.4311315924219151e-06,
401
+ "loss": 0.7062,
402
+ "step": 560
403
+ },
404
+ {
405
+ "epoch": 0.03648,
406
+ "grad_norm": 79985.875,
407
+ "learning_rate": 1.4567332309267796e-06,
408
+ "loss": 0.7155,
409
+ "step": 570
410
+ },
411
+ {
412
+ "epoch": 0.03712,
413
+ "grad_norm": 76334.078125,
414
+ "learning_rate": 1.4823348694316437e-06,
415
+ "loss": 0.7075,
416
+ "step": 580
417
+ },
418
+ {
419
+ "epoch": 0.03776,
420
+ "grad_norm": 140764.03125,
421
+ "learning_rate": 1.507936507936508e-06,
422
+ "loss": 0.7065,
423
+ "step": 590
424
+ },
425
+ {
426
+ "epoch": 0.0384,
427
+ "grad_norm": 100877.296875,
428
+ "learning_rate": 1.5335381464413722e-06,
429
+ "loss": 0.7096,
430
+ "step": 600
431
+ },
432
+ {
433
+ "epoch": 0.03904,
434
+ "grad_norm": 104088.1171875,
435
+ "learning_rate": 1.5591397849462367e-06,
436
+ "loss": 0.6987,
437
+ "step": 610
438
+ },
439
+ {
440
+ "epoch": 0.03968,
441
+ "grad_norm": 80806.2265625,
442
+ "learning_rate": 1.584741423451101e-06,
443
+ "loss": 0.707,
444
+ "step": 620
445
+ },
446
+ {
447
+ "epoch": 0.04032,
448
+ "grad_norm": 109884.765625,
449
+ "learning_rate": 1.6103430619559655e-06,
450
+ "loss": 0.6991,
451
+ "step": 630
452
+ },
453
+ {
454
+ "epoch": 0.04096,
455
+ "grad_norm": 79944.890625,
456
+ "learning_rate": 1.6359447004608298e-06,
457
+ "loss": 0.7047,
458
+ "step": 640
459
+ },
460
+ {
461
+ "epoch": 0.0416,
462
+ "grad_norm": 93673.3828125,
463
+ "learning_rate": 1.6615463389656938e-06,
464
+ "loss": 0.6971,
465
+ "step": 650
466
+ },
467
+ {
468
+ "epoch": 0.04224,
469
+ "grad_norm": 76641.265625,
470
+ "learning_rate": 1.6871479774705581e-06,
471
+ "loss": 0.6957,
472
+ "step": 660
473
+ },
474
+ {
475
+ "epoch": 0.04288,
476
+ "grad_norm": 73583.5546875,
477
+ "learning_rate": 1.7127496159754226e-06,
478
+ "loss": 0.7028,
479
+ "step": 670
480
+ },
481
+ {
482
+ "epoch": 0.04352,
483
+ "grad_norm": 75177.9609375,
484
+ "learning_rate": 1.7383512544802869e-06,
485
+ "loss": 0.7012,
486
+ "step": 680
487
+ },
488
+ {
489
+ "epoch": 0.04416,
490
+ "grad_norm": 78340.8515625,
491
+ "learning_rate": 1.7639528929851512e-06,
492
+ "loss": 0.6987,
493
+ "step": 690
494
+ },
495
+ {
496
+ "epoch": 0.0448,
497
+ "grad_norm": 86004.1171875,
498
+ "learning_rate": 1.7895545314900157e-06,
499
+ "loss": 0.7061,
500
+ "step": 700
501
+ },
502
+ {
503
+ "epoch": 0.04544,
504
+ "grad_norm": 94212.0390625,
505
+ "learning_rate": 1.8151561699948797e-06,
506
+ "loss": 0.6993,
507
+ "step": 710
508
+ },
509
+ {
510
+ "epoch": 0.04608,
511
+ "grad_norm": 83918.2421875,
512
+ "learning_rate": 1.840757808499744e-06,
513
+ "loss": 0.7009,
514
+ "step": 720
515
+ },
516
+ {
517
+ "epoch": 0.04672,
518
+ "grad_norm": 68374.3125,
519
+ "learning_rate": 1.8663594470046085e-06,
520
+ "loss": 0.6964,
521
+ "step": 730
522
+ },
523
+ {
524
+ "epoch": 0.04736,
525
+ "grad_norm": 90348.78125,
526
+ "learning_rate": 1.8919610855094728e-06,
527
+ "loss": 0.7011,
528
+ "step": 740
529
+ },
530
+ {
531
+ "epoch": 0.048,
532
+ "grad_norm": 146658.0,
533
+ "learning_rate": 1.9175627240143373e-06,
534
+ "loss": 0.7003,
535
+ "step": 750
536
+ },
537
+ {
538
+ "epoch": 0.04864,
539
+ "grad_norm": 112037.1640625,
540
+ "learning_rate": 1.9431643625192015e-06,
541
+ "loss": 0.7051,
542
+ "step": 760
543
+ },
544
+ {
545
+ "epoch": 0.04928,
546
+ "grad_norm": 70628.625,
547
+ "learning_rate": 1.9687660010240654e-06,
548
+ "loss": 0.6923,
549
+ "step": 770
550
+ },
551
+ {
552
+ "epoch": 0.04992,
553
+ "grad_norm": 109922.125,
554
+ "learning_rate": 1.99436763952893e-06,
555
+ "loss": 0.6893,
556
+ "step": 780
557
+ },
558
+ {
559
+ "epoch": 0.05056,
560
+ "grad_norm": 135306.375,
561
+ "learning_rate": 2.0199692780337944e-06,
562
+ "loss": 0.7008,
563
+ "step": 790
564
+ },
565
+ {
566
+ "epoch": 0.0512,
567
+ "grad_norm": 82354.8046875,
568
+ "learning_rate": 2.0455709165386586e-06,
569
+ "loss": 0.705,
570
+ "step": 800
571
+ },
572
+ {
573
+ "epoch": 0.05184,
574
+ "grad_norm": 95951.671875,
575
+ "learning_rate": 2.071172555043523e-06,
576
+ "loss": 0.6912,
577
+ "step": 810
578
+ },
579
+ {
580
+ "epoch": 0.05248,
581
+ "grad_norm": 96797.4609375,
582
+ "learning_rate": 2.096774193548387e-06,
583
+ "loss": 0.6922,
584
+ "step": 820
585
+ },
586
+ {
587
+ "epoch": 0.05312,
588
+ "grad_norm": 87190.625,
589
+ "learning_rate": 2.122375832053252e-06,
590
+ "loss": 0.6946,
591
+ "step": 830
592
+ },
593
+ {
594
+ "epoch": 0.05376,
595
+ "grad_norm": 87958.5625,
596
+ "learning_rate": 2.1479774705581158e-06,
597
+ "loss": 0.6949,
598
+ "step": 840
599
+ },
600
+ {
601
+ "epoch": 0.0544,
602
+ "grad_norm": 77217.1796875,
603
+ "learning_rate": 2.17357910906298e-06,
604
+ "loss": 0.6928,
605
+ "step": 850
606
+ },
607
+ {
608
+ "epoch": 0.05504,
609
+ "grad_norm": 117156.5546875,
610
+ "learning_rate": 2.1991807475678443e-06,
611
+ "loss": 0.692,
612
+ "step": 860
613
+ },
614
+ {
615
+ "epoch": 0.05568,
616
+ "grad_norm": 94618.6875,
617
+ "learning_rate": 2.224782386072709e-06,
618
+ "loss": 0.6976,
619
+ "step": 870
620
+ },
621
+ {
622
+ "epoch": 0.05632,
623
+ "grad_norm": 71444.6484375,
624
+ "learning_rate": 2.2503840245775733e-06,
625
+ "loss": 0.6989,
626
+ "step": 880
627
+ },
628
+ {
629
+ "epoch": 0.05696,
630
+ "grad_norm": 159991.609375,
631
+ "learning_rate": 2.2759856630824376e-06,
632
+ "loss": 0.6928,
633
+ "step": 890
634
+ },
635
+ {
636
+ "epoch": 0.0576,
637
+ "grad_norm": 81899.6875,
638
+ "learning_rate": 2.301587301587302e-06,
639
+ "loss": 0.691,
640
+ "step": 900
641
+ },
642
+ {
643
+ "epoch": 0.05824,
644
+ "grad_norm": 110817.3671875,
645
+ "learning_rate": 2.327188940092166e-06,
646
+ "loss": 0.6858,
647
+ "step": 910
648
+ },
649
+ {
650
+ "epoch": 0.05888,
651
+ "grad_norm": 105698.109375,
652
+ "learning_rate": 2.3527905785970304e-06,
653
+ "loss": 0.6965,
654
+ "step": 920
655
+ },
656
+ {
657
+ "epoch": 0.05952,
658
+ "grad_norm": 76475.0,
659
+ "learning_rate": 2.3783922171018947e-06,
660
+ "loss": 0.6901,
661
+ "step": 930
662
+ },
663
+ {
664
+ "epoch": 0.06016,
665
+ "grad_norm": 96672.6796875,
666
+ "learning_rate": 2.403993855606759e-06,
667
+ "loss": 0.6908,
668
+ "step": 940
669
+ },
670
+ {
671
+ "epoch": 0.0608,
672
+ "grad_norm": 114510.8125,
673
+ "learning_rate": 2.4295954941116232e-06,
674
+ "loss": 0.6904,
675
+ "step": 950
676
+ },
677
+ {
678
+ "epoch": 0.06144,
679
+ "grad_norm": 62412.4375,
680
+ "learning_rate": 2.455197132616488e-06,
681
+ "loss": 0.6855,
682
+ "step": 960
683
+ },
684
+ {
685
+ "epoch": 0.06208,
686
+ "grad_norm": 92860.7109375,
687
+ "learning_rate": 2.4807987711213518e-06,
688
+ "loss": 0.6752,
689
+ "step": 970
690
+ },
691
+ {
692
+ "epoch": 0.06272,
693
+ "grad_norm": 75184.359375,
694
+ "learning_rate": 2.506400409626216e-06,
695
+ "loss": 0.6868,
696
+ "step": 980
697
+ },
698
+ {
699
+ "epoch": 0.06336,
700
+ "grad_norm": 77771.1640625,
701
+ "learning_rate": 2.5320020481310808e-06,
702
+ "loss": 0.6941,
703
+ "step": 990
704
+ },
705
+ {
706
+ "epoch": 0.064,
707
+ "grad_norm": 65366.796875,
708
+ "learning_rate": 2.557603686635945e-06,
709
+ "loss": 0.6808,
710
+ "step": 1000
711
+ },
712
+ {
713
+ "epoch": 0.064,
714
+ "eval_accuracy": 0.75744,
715
+ "eval_loss": 0.6539617776870728,
716
+ "eval_macro_f1": 0.7549859375827388,
717
+ "eval_runtime": 1576.6702,
718
+ "eval_samples_per_second": 63.425,
719
+ "eval_steps_per_second": 0.496,
720
+ "step": 1000
721
+ },
722
+ {
723
+ "epoch": 0.06464,
724
+ "grad_norm": 73310.6171875,
725
+ "learning_rate": 2.583205325140809e-06,
726
+ "loss": 0.6866,
727
+ "step": 1010
728
+ },
729
+ {
730
+ "epoch": 0.06528,
731
+ "grad_norm": 80602.859375,
732
+ "learning_rate": 2.6088069636456736e-06,
733
+ "loss": 0.6873,
734
+ "step": 1020
735
+ },
736
+ {
737
+ "epoch": 0.06592,
738
+ "grad_norm": 121537.0234375,
739
+ "learning_rate": 2.634408602150538e-06,
740
+ "loss": 0.6806,
741
+ "step": 1030
742
+ },
743
+ {
744
+ "epoch": 0.06656,
745
+ "grad_norm": 105537.46875,
746
+ "learning_rate": 2.6600102406554026e-06,
747
+ "loss": 0.6835,
748
+ "step": 1040
749
+ },
750
+ {
751
+ "epoch": 0.0672,
752
+ "grad_norm": 188847.71875,
753
+ "learning_rate": 2.6856118791602664e-06,
754
+ "loss": 0.687,
755
+ "step": 1050
756
+ },
757
+ {
758
+ "epoch": 0.06784,
759
+ "grad_norm": 73677.8359375,
760
+ "learning_rate": 2.7112135176651307e-06,
761
+ "loss": 0.6848,
762
+ "step": 1060
763
+ },
764
+ {
765
+ "epoch": 0.06848,
766
+ "grad_norm": 72158.984375,
767
+ "learning_rate": 2.736815156169995e-06,
768
+ "loss": 0.6833,
769
+ "step": 1070
770
+ },
771
+ {
772
+ "epoch": 0.06912,
773
+ "grad_norm": 73585.1015625,
774
+ "learning_rate": 2.7624167946748593e-06,
775
+ "loss": 0.6766,
776
+ "step": 1080
777
+ },
778
+ {
779
+ "epoch": 0.06976,
780
+ "grad_norm": 78721.0390625,
781
+ "learning_rate": 2.788018433179724e-06,
782
+ "loss": 0.6796,
783
+ "step": 1090
784
+ },
785
+ {
786
+ "epoch": 0.0704,
787
+ "grad_norm": 72044.3515625,
788
+ "learning_rate": 2.813620071684588e-06,
789
+ "loss": 0.673,
790
+ "step": 1100
791
+ },
792
+ {
793
+ "epoch": 0.07104,
794
+ "grad_norm": 100059.984375,
795
+ "learning_rate": 2.8392217101894525e-06,
796
+ "loss": 0.6783,
797
+ "step": 1110
798
+ },
799
+ {
800
+ "epoch": 0.07168,
801
+ "grad_norm": 92175.921875,
802
+ "learning_rate": 2.864823348694317e-06,
803
+ "loss": 0.6901,
804
+ "step": 1120
805
+ },
806
+ {
807
+ "epoch": 0.07232,
808
+ "grad_norm": 86143.453125,
809
+ "learning_rate": 2.8904249871991806e-06,
810
+ "loss": 0.6769,
811
+ "step": 1130
812
+ },
813
+ {
814
+ "epoch": 0.07296,
815
+ "grad_norm": 101410.171875,
816
+ "learning_rate": 2.9160266257040453e-06,
817
+ "loss": 0.6781,
818
+ "step": 1140
819
+ },
820
+ {
821
+ "epoch": 0.0736,
822
+ "grad_norm": 67173.296875,
823
+ "learning_rate": 2.9416282642089096e-06,
824
+ "loss": 0.6737,
825
+ "step": 1150
826
+ },
827
+ {
828
+ "epoch": 0.07424,
829
+ "grad_norm": 100701.8203125,
830
+ "learning_rate": 2.967229902713774e-06,
831
+ "loss": 0.6741,
832
+ "step": 1160
833
+ },
834
+ {
835
+ "epoch": 0.07488,
836
+ "grad_norm": 75457.328125,
837
+ "learning_rate": 2.992831541218638e-06,
838
+ "loss": 0.6742,
839
+ "step": 1170
840
+ },
841
+ {
842
+ "epoch": 0.07552,
843
+ "grad_norm": 97755.9921875,
844
+ "learning_rate": 3.018433179723503e-06,
845
+ "loss": 0.6845,
846
+ "step": 1180
847
+ },
848
+ {
849
+ "epoch": 0.07616,
850
+ "grad_norm": 53426.1171875,
851
+ "learning_rate": 3.0440348182283667e-06,
852
+ "loss": 0.6718,
853
+ "step": 1190
854
+ },
855
+ {
856
+ "epoch": 0.0768,
857
+ "grad_norm": 71654.625,
858
+ "learning_rate": 3.069636456733231e-06,
859
+ "loss": 0.6798,
860
+ "step": 1200
861
+ }
862
+ ],
863
+ "logging_steps": 10,
864
+ "max_steps": 78125,
865
+ "num_input_tokens_seen": 0,
866
+ "num_train_epochs": 5,
867
+ "save_steps": 200,
868
+ "stateful_callbacks": {
869
+ "EarlyStoppingCallback": {
870
+ "args": {
871
+ "early_stopping_patience": 3,
872
+ "early_stopping_threshold": 0.0
873
+ },
874
+ "attributes": {
875
+ "early_stopping_patience_counter": 0
876
+ }
877
+ },
878
+ "TrainerControl": {
879
+ "args": {
880
+ "should_epoch_stop": false,
881
+ "should_evaluate": false,
882
+ "should_log": false,
883
+ "should_save": true,
884
+ "should_training_stop": false
885
+ },
886
+ "attributes": {}
887
+ }
888
+ },
889
+ "total_flos": 1.010052097089216e+16,
890
+ "train_batch_size": 32,
891
+ "trial_name": null,
892
+ "trial_params": null
893
+ }
graphcodebert-robust/checkpoint-1200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64c0b8a6d392bd2e7b64d7504ce486a51b83e4079e79341886020b90ee199ffd
3
+ size 5841
graphcodebert-robust/checkpoint-1200/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-robust/checkpoint-1400/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.2,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.2,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-05,
17
+ "max_position_embeddings": 514,
18
+ "model_type": "roberta",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "output_past": true,
22
+ "pad_token_id": 1,
23
+ "position_embedding_type": "absolute",
24
+ "transformers_version": "4.56.0",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 50265
28
+ }
graphcodebert-robust/checkpoint-1400/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-robust/checkpoint-1400/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a8a0219e43cfa3944fd703c1f03513edd34ea0759e8cfa860a3745dd8193b74
3
+ size 498612824
graphcodebert-robust/checkpoint-1400/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8608dd58fb85cad2a9391071270bcff3f15e3dbe021243f0958dae390c241351
3
+ size 4741923
graphcodebert-robust/checkpoint-1400/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f2a8f29bbe7f3f25e78d6ee9dda5373e833b88a3ed31e47c81d28dd8e2b5d46
3
+ size 14581
graphcodebert-robust/checkpoint-1400/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
3
+ size 1383
graphcodebert-robust/checkpoint-1400/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2d436dc220f12ae5324774310bba197fb7b5785ee94cafdd5c89c7de116215c
3
+ size 1465
graphcodebert-robust/checkpoint-1400/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
graphcodebert-robust/checkpoint-1400/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-robust/checkpoint-1400/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
graphcodebert-robust/checkpoint-1400/trainer_state.json ADDED
@@ -0,0 +1,1033 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1000,
3
+ "best_metric": 0.7549859375827388,
4
+ "best_model_checkpoint": "./output_checkpoints/graphcodebert-robust/checkpoint-1000",
5
+ "epoch": 0.0896,
6
+ "eval_steps": 1000,
7
+ "global_step": 1400,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.00064,
14
+ "grad_norm": 1.6144306659698486,
15
+ "learning_rate": 1.1520000000000002e-08,
16
+ "loss": 0.729,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.00128,
21
+ "grad_norm": 2.0952296257019043,
22
+ "learning_rate": 2.4320000000000002e-08,
23
+ "loss": 0.7295,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.00192,
28
+ "grad_norm": 1.3587689399719238,
29
+ "learning_rate": 3.7120000000000004e-08,
30
+ "loss": 0.73,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.00256,
35
+ "grad_norm": 1.2531732320785522,
36
+ "learning_rate": 4.9920000000000006e-08,
37
+ "loss": 0.7221,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.0032,
42
+ "grad_norm": 1.437932014465332,
43
+ "learning_rate": 6.272000000000001e-08,
44
+ "loss": 0.7209,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.00384,
49
+ "grad_norm": 1.418426752090454,
50
+ "learning_rate": 7.552e-08,
51
+ "loss": 0.729,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.00448,
56
+ "grad_norm": 1.9476298093795776,
57
+ "learning_rate": 8.832e-08,
58
+ "loss": 0.7242,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.00512,
63
+ "grad_norm": 1.7948051691055298,
64
+ "learning_rate": 1.0112000000000001e-07,
65
+ "loss": 0.7227,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.00576,
70
+ "grad_norm": 1.6534360647201538,
71
+ "learning_rate": 1.1392e-07,
72
+ "loss": 0.7234,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.0064,
77
+ "grad_norm": 1.0920158624649048,
78
+ "learning_rate": 1.2672e-07,
79
+ "loss": 0.7328,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.00704,
84
+ "grad_norm": 1.977837085723877,
85
+ "learning_rate": 1.3952000000000002e-07,
86
+ "loss": 0.7263,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.00768,
91
+ "grad_norm": 1.388983130455017,
92
+ "learning_rate": 1.5232000000000003e-07,
93
+ "loss": 0.7286,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.00832,
98
+ "grad_norm": 1.2956682443618774,
99
+ "learning_rate": 1.6512e-07,
100
+ "loss": 0.7251,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.00896,
105
+ "grad_norm": 1.8125052452087402,
106
+ "learning_rate": 1.7792e-07,
107
+ "loss": 0.7251,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.0096,
112
+ "grad_norm": 1.626846194267273,
113
+ "learning_rate": 1.9072e-07,
114
+ "loss": 0.727,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.01024,
119
+ "grad_norm": 2.3243086338043213,
120
+ "learning_rate": 2.0352e-07,
121
+ "loss": 0.726,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 0.01088,
126
+ "grad_norm": 1.4734737873077393,
127
+ "learning_rate": 2.1632e-07,
128
+ "loss": 0.7252,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 0.01152,
133
+ "grad_norm": 2.090498685836792,
134
+ "learning_rate": 2.2912e-07,
135
+ "loss": 0.7273,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 0.01216,
140
+ "grad_norm": 1.7563093900680542,
141
+ "learning_rate": 2.4192000000000004e-07,
142
+ "loss": 0.719,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 0.0128,
147
+ "grad_norm": 1.449843168258667,
148
+ "learning_rate": 2.5472000000000005e-07,
149
+ "loss": 0.7237,
150
+ "step": 200
151
+ },
152
+ {
153
+ "epoch": 0.01344,
154
+ "grad_norm": 141396.296875,
155
+ "learning_rate": 5.350742447516642e-07,
156
+ "loss": 0.7217,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 0.01408,
161
+ "grad_norm": 102339.1640625,
162
+ "learning_rate": 5.606758832565284e-07,
163
+ "loss": 0.7215,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 0.01472,
168
+ "grad_norm": 134052.9375,
169
+ "learning_rate": 5.862775217613928e-07,
170
+ "loss": 0.7115,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 0.01536,
175
+ "grad_norm": 87181.984375,
176
+ "learning_rate": 6.118791602662571e-07,
177
+ "loss": 0.7241,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 0.016,
182
+ "grad_norm": 100231.328125,
183
+ "learning_rate": 6.374807987711214e-07,
184
+ "loss": 0.71,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 0.01664,
189
+ "grad_norm": 136721.484375,
190
+ "learning_rate": 6.630824372759858e-07,
191
+ "loss": 0.7188,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 0.01728,
196
+ "grad_norm": 115868.8125,
197
+ "learning_rate": 6.8868407578085e-07,
198
+ "loss": 0.7199,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 0.01792,
203
+ "grad_norm": 70205.1484375,
204
+ "learning_rate": 7.142857142857143e-07,
205
+ "loss": 0.7299,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 0.01856,
210
+ "grad_norm": 98926.4453125,
211
+ "learning_rate": 7.398873527905787e-07,
212
+ "loss": 0.7159,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 0.0192,
217
+ "grad_norm": 134108.140625,
218
+ "learning_rate": 7.65488991295443e-07,
219
+ "loss": 0.7122,
220
+ "step": 300
221
+ },
222
+ {
223
+ "epoch": 0.01984,
224
+ "grad_norm": 103719.140625,
225
+ "learning_rate": 7.910906298003073e-07,
226
+ "loss": 0.7185,
227
+ "step": 310
228
+ },
229
+ {
230
+ "epoch": 0.02048,
231
+ "grad_norm": 85624.953125,
232
+ "learning_rate": 8.166922683051716e-07,
233
+ "loss": 0.718,
234
+ "step": 320
235
+ },
236
+ {
237
+ "epoch": 0.02112,
238
+ "grad_norm": 138824.15625,
239
+ "learning_rate": 8.422939068100359e-07,
240
+ "loss": 0.713,
241
+ "step": 330
242
+ },
243
+ {
244
+ "epoch": 0.02176,
245
+ "grad_norm": 73629.0859375,
246
+ "learning_rate": 8.678955453149002e-07,
247
+ "loss": 0.7186,
248
+ "step": 340
249
+ },
250
+ {
251
+ "epoch": 0.0224,
252
+ "grad_norm": 132493.0,
253
+ "learning_rate": 8.934971838197646e-07,
254
+ "loss": 0.7133,
255
+ "step": 350
256
+ },
257
+ {
258
+ "epoch": 0.02304,
259
+ "grad_norm": 85223.625,
260
+ "learning_rate": 9.190988223246289e-07,
261
+ "loss": 0.7124,
262
+ "step": 360
263
+ },
264
+ {
265
+ "epoch": 0.02368,
266
+ "grad_norm": 77868.78125,
267
+ "learning_rate": 9.447004608294931e-07,
268
+ "loss": 0.7058,
269
+ "step": 370
270
+ },
271
+ {
272
+ "epoch": 0.02432,
273
+ "grad_norm": 75874.3046875,
274
+ "learning_rate": 9.703020993343575e-07,
275
+ "loss": 0.7139,
276
+ "step": 380
277
+ },
278
+ {
279
+ "epoch": 0.02496,
280
+ "grad_norm": 151937.703125,
281
+ "learning_rate": 9.959037378392218e-07,
282
+ "loss": 0.713,
283
+ "step": 390
284
+ },
285
+ {
286
+ "epoch": 0.0256,
287
+ "grad_norm": 161711.671875,
288
+ "learning_rate": 1.021505376344086e-06,
289
+ "loss": 0.7137,
290
+ "step": 400
291
+ },
292
+ {
293
+ "epoch": 0.02624,
294
+ "grad_norm": 90800.234375,
295
+ "learning_rate": 1.0471070148489503e-06,
296
+ "loss": 0.7091,
297
+ "step": 410
298
+ },
299
+ {
300
+ "epoch": 0.02688,
301
+ "grad_norm": 82131.34375,
302
+ "learning_rate": 1.0727086533538148e-06,
303
+ "loss": 0.7098,
304
+ "step": 420
305
+ },
306
+ {
307
+ "epoch": 0.02752,
308
+ "grad_norm": 92818.9140625,
309
+ "learning_rate": 1.0983102918586791e-06,
310
+ "loss": 0.7099,
311
+ "step": 430
312
+ },
313
+ {
314
+ "epoch": 0.02816,
315
+ "grad_norm": 88555.5078125,
316
+ "learning_rate": 1.1239119303635434e-06,
317
+ "loss": 0.7086,
318
+ "step": 440
319
+ },
320
+ {
321
+ "epoch": 0.0288,
322
+ "grad_norm": 73428.6015625,
323
+ "learning_rate": 1.1495135688684077e-06,
324
+ "loss": 0.7117,
325
+ "step": 450
326
+ },
327
+ {
328
+ "epoch": 0.02944,
329
+ "grad_norm": 128938.7421875,
330
+ "learning_rate": 1.175115207373272e-06,
331
+ "loss": 0.7182,
332
+ "step": 460
333
+ },
334
+ {
335
+ "epoch": 0.03008,
336
+ "grad_norm": 102742.3359375,
337
+ "learning_rate": 1.2007168458781362e-06,
338
+ "loss": 0.7108,
339
+ "step": 470
340
+ },
341
+ {
342
+ "epoch": 0.03072,
343
+ "grad_norm": 73825.8125,
344
+ "learning_rate": 1.2263184843830007e-06,
345
+ "loss": 0.7087,
346
+ "step": 480
347
+ },
348
+ {
349
+ "epoch": 0.03136,
350
+ "grad_norm": 110930.75,
351
+ "learning_rate": 1.251920122887865e-06,
352
+ "loss": 0.7232,
353
+ "step": 490
354
+ },
355
+ {
356
+ "epoch": 0.032,
357
+ "grad_norm": 95068.84375,
358
+ "learning_rate": 1.2775217613927293e-06,
359
+ "loss": 0.703,
360
+ "step": 500
361
+ },
362
+ {
363
+ "epoch": 0.03264,
364
+ "grad_norm": 118731.9296875,
365
+ "learning_rate": 1.3031233998975938e-06,
366
+ "loss": 0.7063,
367
+ "step": 510
368
+ },
369
+ {
370
+ "epoch": 0.03328,
371
+ "grad_norm": 80511.828125,
372
+ "learning_rate": 1.3287250384024578e-06,
373
+ "loss": 0.7143,
374
+ "step": 520
375
+ },
376
+ {
377
+ "epoch": 0.03392,
378
+ "grad_norm": 84864.484375,
379
+ "learning_rate": 1.354326676907322e-06,
380
+ "loss": 0.7055,
381
+ "step": 530
382
+ },
383
+ {
384
+ "epoch": 0.03456,
385
+ "grad_norm": 107800.109375,
386
+ "learning_rate": 1.3799283154121864e-06,
387
+ "loss": 0.7119,
388
+ "step": 540
389
+ },
390
+ {
391
+ "epoch": 0.0352,
392
+ "grad_norm": 83667.671875,
393
+ "learning_rate": 1.4055299539170509e-06,
394
+ "loss": 0.7082,
395
+ "step": 550
396
+ },
397
+ {
398
+ "epoch": 0.03584,
399
+ "grad_norm": 75656.4140625,
400
+ "learning_rate": 1.4311315924219151e-06,
401
+ "loss": 0.7062,
402
+ "step": 560
403
+ },
404
+ {
405
+ "epoch": 0.03648,
406
+ "grad_norm": 79985.875,
407
+ "learning_rate": 1.4567332309267796e-06,
408
+ "loss": 0.7155,
409
+ "step": 570
410
+ },
411
+ {
412
+ "epoch": 0.03712,
413
+ "grad_norm": 76334.078125,
414
+ "learning_rate": 1.4823348694316437e-06,
415
+ "loss": 0.7075,
416
+ "step": 580
417
+ },
418
+ {
419
+ "epoch": 0.03776,
420
+ "grad_norm": 140764.03125,
421
+ "learning_rate": 1.507936507936508e-06,
422
+ "loss": 0.7065,
423
+ "step": 590
424
+ },
425
+ {
426
+ "epoch": 0.0384,
427
+ "grad_norm": 100877.296875,
428
+ "learning_rate": 1.5335381464413722e-06,
429
+ "loss": 0.7096,
430
+ "step": 600
431
+ },
432
+ {
433
+ "epoch": 0.03904,
434
+ "grad_norm": 104088.1171875,
435
+ "learning_rate": 1.5591397849462367e-06,
436
+ "loss": 0.6987,
437
+ "step": 610
438
+ },
439
+ {
440
+ "epoch": 0.03968,
441
+ "grad_norm": 80806.2265625,
442
+ "learning_rate": 1.584741423451101e-06,
443
+ "loss": 0.707,
444
+ "step": 620
445
+ },
446
+ {
447
+ "epoch": 0.04032,
448
+ "grad_norm": 109884.765625,
449
+ "learning_rate": 1.6103430619559655e-06,
450
+ "loss": 0.6991,
451
+ "step": 630
452
+ },
453
+ {
454
+ "epoch": 0.04096,
455
+ "grad_norm": 79944.890625,
456
+ "learning_rate": 1.6359447004608298e-06,
457
+ "loss": 0.7047,
458
+ "step": 640
459
+ },
460
+ {
461
+ "epoch": 0.0416,
462
+ "grad_norm": 93673.3828125,
463
+ "learning_rate": 1.6615463389656938e-06,
464
+ "loss": 0.6971,
465
+ "step": 650
466
+ },
467
+ {
468
+ "epoch": 0.04224,
469
+ "grad_norm": 76641.265625,
470
+ "learning_rate": 1.6871479774705581e-06,
471
+ "loss": 0.6957,
472
+ "step": 660
473
+ },
474
+ {
475
+ "epoch": 0.04288,
476
+ "grad_norm": 73583.5546875,
477
+ "learning_rate": 1.7127496159754226e-06,
478
+ "loss": 0.7028,
479
+ "step": 670
480
+ },
481
+ {
482
+ "epoch": 0.04352,
483
+ "grad_norm": 75177.9609375,
484
+ "learning_rate": 1.7383512544802869e-06,
485
+ "loss": 0.7012,
486
+ "step": 680
487
+ },
488
+ {
489
+ "epoch": 0.04416,
490
+ "grad_norm": 78340.8515625,
491
+ "learning_rate": 1.7639528929851512e-06,
492
+ "loss": 0.6987,
493
+ "step": 690
494
+ },
495
+ {
496
+ "epoch": 0.0448,
497
+ "grad_norm": 86004.1171875,
498
+ "learning_rate": 1.7895545314900157e-06,
499
+ "loss": 0.7061,
500
+ "step": 700
501
+ },
502
+ {
503
+ "epoch": 0.04544,
504
+ "grad_norm": 94212.0390625,
505
+ "learning_rate": 1.8151561699948797e-06,
506
+ "loss": 0.6993,
507
+ "step": 710
508
+ },
509
+ {
510
+ "epoch": 0.04608,
511
+ "grad_norm": 83918.2421875,
512
+ "learning_rate": 1.840757808499744e-06,
513
+ "loss": 0.7009,
514
+ "step": 720
515
+ },
516
+ {
517
+ "epoch": 0.04672,
518
+ "grad_norm": 68374.3125,
519
+ "learning_rate": 1.8663594470046085e-06,
520
+ "loss": 0.6964,
521
+ "step": 730
522
+ },
523
+ {
524
+ "epoch": 0.04736,
525
+ "grad_norm": 90348.78125,
526
+ "learning_rate": 1.8919610855094728e-06,
527
+ "loss": 0.7011,
528
+ "step": 740
529
+ },
530
+ {
531
+ "epoch": 0.048,
532
+ "grad_norm": 146658.0,
533
+ "learning_rate": 1.9175627240143373e-06,
534
+ "loss": 0.7003,
535
+ "step": 750
536
+ },
537
+ {
538
+ "epoch": 0.04864,
539
+ "grad_norm": 112037.1640625,
540
+ "learning_rate": 1.9431643625192015e-06,
541
+ "loss": 0.7051,
542
+ "step": 760
543
+ },
544
+ {
545
+ "epoch": 0.04928,
546
+ "grad_norm": 70628.625,
547
+ "learning_rate": 1.9687660010240654e-06,
548
+ "loss": 0.6923,
549
+ "step": 770
550
+ },
551
+ {
552
+ "epoch": 0.04992,
553
+ "grad_norm": 109922.125,
554
+ "learning_rate": 1.99436763952893e-06,
555
+ "loss": 0.6893,
556
+ "step": 780
557
+ },
558
+ {
559
+ "epoch": 0.05056,
560
+ "grad_norm": 135306.375,
561
+ "learning_rate": 2.0199692780337944e-06,
562
+ "loss": 0.7008,
563
+ "step": 790
564
+ },
565
+ {
566
+ "epoch": 0.0512,
567
+ "grad_norm": 82354.8046875,
568
+ "learning_rate": 2.0455709165386586e-06,
569
+ "loss": 0.705,
570
+ "step": 800
571
+ },
572
+ {
573
+ "epoch": 0.05184,
574
+ "grad_norm": 95951.671875,
575
+ "learning_rate": 2.071172555043523e-06,
576
+ "loss": 0.6912,
577
+ "step": 810
578
+ },
579
+ {
580
+ "epoch": 0.05248,
581
+ "grad_norm": 96797.4609375,
582
+ "learning_rate": 2.096774193548387e-06,
583
+ "loss": 0.6922,
584
+ "step": 820
585
+ },
586
+ {
587
+ "epoch": 0.05312,
588
+ "grad_norm": 87190.625,
589
+ "learning_rate": 2.122375832053252e-06,
590
+ "loss": 0.6946,
591
+ "step": 830
592
+ },
593
+ {
594
+ "epoch": 0.05376,
595
+ "grad_norm": 87958.5625,
596
+ "learning_rate": 2.1479774705581158e-06,
597
+ "loss": 0.6949,
598
+ "step": 840
599
+ },
600
+ {
601
+ "epoch": 0.0544,
602
+ "grad_norm": 77217.1796875,
603
+ "learning_rate": 2.17357910906298e-06,
604
+ "loss": 0.6928,
605
+ "step": 850
606
+ },
607
+ {
608
+ "epoch": 0.05504,
609
+ "grad_norm": 117156.5546875,
610
+ "learning_rate": 2.1991807475678443e-06,
611
+ "loss": 0.692,
612
+ "step": 860
613
+ },
614
+ {
615
+ "epoch": 0.05568,
616
+ "grad_norm": 94618.6875,
617
+ "learning_rate": 2.224782386072709e-06,
618
+ "loss": 0.6976,
619
+ "step": 870
620
+ },
621
+ {
622
+ "epoch": 0.05632,
623
+ "grad_norm": 71444.6484375,
624
+ "learning_rate": 2.2503840245775733e-06,
625
+ "loss": 0.6989,
626
+ "step": 880
627
+ },
628
+ {
629
+ "epoch": 0.05696,
630
+ "grad_norm": 159991.609375,
631
+ "learning_rate": 2.2759856630824376e-06,
632
+ "loss": 0.6928,
633
+ "step": 890
634
+ },
635
+ {
636
+ "epoch": 0.0576,
637
+ "grad_norm": 81899.6875,
638
+ "learning_rate": 2.301587301587302e-06,
639
+ "loss": 0.691,
640
+ "step": 900
641
+ },
642
+ {
643
+ "epoch": 0.05824,
644
+ "grad_norm": 110817.3671875,
645
+ "learning_rate": 2.327188940092166e-06,
646
+ "loss": 0.6858,
647
+ "step": 910
648
+ },
649
+ {
650
+ "epoch": 0.05888,
651
+ "grad_norm": 105698.109375,
652
+ "learning_rate": 2.3527905785970304e-06,
653
+ "loss": 0.6965,
654
+ "step": 920
655
+ },
656
+ {
657
+ "epoch": 0.05952,
658
+ "grad_norm": 76475.0,
659
+ "learning_rate": 2.3783922171018947e-06,
660
+ "loss": 0.6901,
661
+ "step": 930
662
+ },
663
+ {
664
+ "epoch": 0.06016,
665
+ "grad_norm": 96672.6796875,
666
+ "learning_rate": 2.403993855606759e-06,
667
+ "loss": 0.6908,
668
+ "step": 940
669
+ },
670
+ {
671
+ "epoch": 0.0608,
672
+ "grad_norm": 114510.8125,
673
+ "learning_rate": 2.4295954941116232e-06,
674
+ "loss": 0.6904,
675
+ "step": 950
676
+ },
677
+ {
678
+ "epoch": 0.06144,
679
+ "grad_norm": 62412.4375,
680
+ "learning_rate": 2.455197132616488e-06,
681
+ "loss": 0.6855,
682
+ "step": 960
683
+ },
684
+ {
685
+ "epoch": 0.06208,
686
+ "grad_norm": 92860.7109375,
687
+ "learning_rate": 2.4807987711213518e-06,
688
+ "loss": 0.6752,
689
+ "step": 970
690
+ },
691
+ {
692
+ "epoch": 0.06272,
693
+ "grad_norm": 75184.359375,
694
+ "learning_rate": 2.506400409626216e-06,
695
+ "loss": 0.6868,
696
+ "step": 980
697
+ },
698
+ {
699
+ "epoch": 0.06336,
700
+ "grad_norm": 77771.1640625,
701
+ "learning_rate": 2.5320020481310808e-06,
702
+ "loss": 0.6941,
703
+ "step": 990
704
+ },
705
+ {
706
+ "epoch": 0.064,
707
+ "grad_norm": 65366.796875,
708
+ "learning_rate": 2.557603686635945e-06,
709
+ "loss": 0.6808,
710
+ "step": 1000
711
+ },
712
+ {
713
+ "epoch": 0.064,
714
+ "eval_accuracy": 0.75744,
715
+ "eval_loss": 0.6539617776870728,
716
+ "eval_macro_f1": 0.7549859375827388,
717
+ "eval_runtime": 1576.6702,
718
+ "eval_samples_per_second": 63.425,
719
+ "eval_steps_per_second": 0.496,
720
+ "step": 1000
721
+ },
722
+ {
723
+ "epoch": 0.06464,
724
+ "grad_norm": 73310.6171875,
725
+ "learning_rate": 2.583205325140809e-06,
726
+ "loss": 0.6866,
727
+ "step": 1010
728
+ },
729
+ {
730
+ "epoch": 0.06528,
731
+ "grad_norm": 80602.859375,
732
+ "learning_rate": 2.6088069636456736e-06,
733
+ "loss": 0.6873,
734
+ "step": 1020
735
+ },
736
+ {
737
+ "epoch": 0.06592,
738
+ "grad_norm": 121537.0234375,
739
+ "learning_rate": 2.634408602150538e-06,
740
+ "loss": 0.6806,
741
+ "step": 1030
742
+ },
743
+ {
744
+ "epoch": 0.06656,
745
+ "grad_norm": 105537.46875,
746
+ "learning_rate": 2.6600102406554026e-06,
747
+ "loss": 0.6835,
748
+ "step": 1040
749
+ },
750
+ {
751
+ "epoch": 0.0672,
752
+ "grad_norm": 188847.71875,
753
+ "learning_rate": 2.6856118791602664e-06,
754
+ "loss": 0.687,
755
+ "step": 1050
756
+ },
757
+ {
758
+ "epoch": 0.06784,
759
+ "grad_norm": 73677.8359375,
760
+ "learning_rate": 2.7112135176651307e-06,
761
+ "loss": 0.6848,
762
+ "step": 1060
763
+ },
764
+ {
765
+ "epoch": 0.06848,
766
+ "grad_norm": 72158.984375,
767
+ "learning_rate": 2.736815156169995e-06,
768
+ "loss": 0.6833,
769
+ "step": 1070
770
+ },
771
+ {
772
+ "epoch": 0.06912,
773
+ "grad_norm": 73585.1015625,
774
+ "learning_rate": 2.7624167946748593e-06,
775
+ "loss": 0.6766,
776
+ "step": 1080
777
+ },
778
+ {
779
+ "epoch": 0.06976,
780
+ "grad_norm": 78721.0390625,
781
+ "learning_rate": 2.788018433179724e-06,
782
+ "loss": 0.6796,
783
+ "step": 1090
784
+ },
785
+ {
786
+ "epoch": 0.0704,
787
+ "grad_norm": 72044.3515625,
788
+ "learning_rate": 2.813620071684588e-06,
789
+ "loss": 0.673,
790
+ "step": 1100
791
+ },
792
+ {
793
+ "epoch": 0.07104,
794
+ "grad_norm": 100059.984375,
795
+ "learning_rate": 2.8392217101894525e-06,
796
+ "loss": 0.6783,
797
+ "step": 1110
798
+ },
799
+ {
800
+ "epoch": 0.07168,
801
+ "grad_norm": 92175.921875,
802
+ "learning_rate": 2.864823348694317e-06,
803
+ "loss": 0.6901,
804
+ "step": 1120
805
+ },
806
+ {
807
+ "epoch": 0.07232,
808
+ "grad_norm": 86143.453125,
809
+ "learning_rate": 2.8904249871991806e-06,
810
+ "loss": 0.6769,
811
+ "step": 1130
812
+ },
813
+ {
814
+ "epoch": 0.07296,
815
+ "grad_norm": 101410.171875,
816
+ "learning_rate": 2.9160266257040453e-06,
817
+ "loss": 0.6781,
818
+ "step": 1140
819
+ },
820
+ {
821
+ "epoch": 0.0736,
822
+ "grad_norm": 67173.296875,
823
+ "learning_rate": 2.9416282642089096e-06,
824
+ "loss": 0.6737,
825
+ "step": 1150
826
+ },
827
+ {
828
+ "epoch": 0.07424,
829
+ "grad_norm": 100701.8203125,
830
+ "learning_rate": 2.967229902713774e-06,
831
+ "loss": 0.6741,
832
+ "step": 1160
833
+ },
834
+ {
835
+ "epoch": 0.07488,
836
+ "grad_norm": 75457.328125,
837
+ "learning_rate": 2.992831541218638e-06,
838
+ "loss": 0.6742,
839
+ "step": 1170
840
+ },
841
+ {
842
+ "epoch": 0.07552,
843
+ "grad_norm": 97755.9921875,
844
+ "learning_rate": 3.018433179723503e-06,
845
+ "loss": 0.6845,
846
+ "step": 1180
847
+ },
848
+ {
849
+ "epoch": 0.07616,
850
+ "grad_norm": 53426.1171875,
851
+ "learning_rate": 3.0440348182283667e-06,
852
+ "loss": 0.6718,
853
+ "step": 1190
854
+ },
855
+ {
856
+ "epoch": 0.0768,
857
+ "grad_norm": 71654.625,
858
+ "learning_rate": 3.069636456733231e-06,
859
+ "loss": 0.6798,
860
+ "step": 1200
861
+ },
862
+ {
863
+ "epoch": 0.07744,
864
+ "grad_norm": 74562.71875,
865
+ "learning_rate": 3.0952380952380957e-06,
866
+ "loss": 0.6771,
867
+ "step": 1210
868
+ },
869
+ {
870
+ "epoch": 0.07808,
871
+ "grad_norm": 102821.5,
872
+ "learning_rate": 3.1208397337429596e-06,
873
+ "loss": 0.6682,
874
+ "step": 1220
875
+ },
876
+ {
877
+ "epoch": 0.07872,
878
+ "grad_norm": 102060.71875,
879
+ "learning_rate": 3.1464413722478243e-06,
880
+ "loss": 0.6734,
881
+ "step": 1230
882
+ },
883
+ {
884
+ "epoch": 0.07936,
885
+ "grad_norm": 106793.0546875,
886
+ "learning_rate": 3.1720430107526885e-06,
887
+ "loss": 0.6775,
888
+ "step": 1240
889
+ },
890
+ {
891
+ "epoch": 0.08,
892
+ "grad_norm": 118106.40625,
893
+ "learning_rate": 3.1976446492575524e-06,
894
+ "loss": 0.6789,
895
+ "step": 1250
896
+ },
897
+ {
898
+ "epoch": 0.08064,
899
+ "grad_norm": 80626.078125,
900
+ "learning_rate": 3.223246287762417e-06,
901
+ "loss": 0.6675,
902
+ "step": 1260
903
+ },
904
+ {
905
+ "epoch": 0.08128,
906
+ "grad_norm": 78956.4375,
907
+ "learning_rate": 3.2488479262672814e-06,
908
+ "loss": 0.6658,
909
+ "step": 1270
910
+ },
911
+ {
912
+ "epoch": 0.08192,
913
+ "grad_norm": 98567.125,
914
+ "learning_rate": 3.2744495647721457e-06,
915
+ "loss": 0.6726,
916
+ "step": 1280
917
+ },
918
+ {
919
+ "epoch": 0.08256,
920
+ "grad_norm": 84071.5546875,
921
+ "learning_rate": 3.30005120327701e-06,
922
+ "loss": 0.6793,
923
+ "step": 1290
924
+ },
925
+ {
926
+ "epoch": 0.0832,
927
+ "grad_norm": 92090.375,
928
+ "learning_rate": 3.3256528417818746e-06,
929
+ "loss": 0.6758,
930
+ "step": 1300
931
+ },
932
+ {
933
+ "epoch": 0.08384,
934
+ "grad_norm": 82021.3671875,
935
+ "learning_rate": 3.3512544802867385e-06,
936
+ "loss": 0.6731,
937
+ "step": 1310
938
+ },
939
+ {
940
+ "epoch": 0.08448,
941
+ "grad_norm": 156372.765625,
942
+ "learning_rate": 3.3768561187916028e-06,
943
+ "loss": 0.6657,
944
+ "step": 1320
945
+ },
946
+ {
947
+ "epoch": 0.08512,
948
+ "grad_norm": 71925.234375,
949
+ "learning_rate": 3.4024577572964675e-06,
950
+ "loss": 0.6838,
951
+ "step": 1330
952
+ },
953
+ {
954
+ "epoch": 0.08576,
955
+ "grad_norm": 103299.3828125,
956
+ "learning_rate": 3.4280593958013313e-06,
957
+ "loss": 0.663,
958
+ "step": 1340
959
+ },
960
+ {
961
+ "epoch": 0.0864,
962
+ "grad_norm": 71233.90625,
963
+ "learning_rate": 3.453661034306196e-06,
964
+ "loss": 0.6754,
965
+ "step": 1350
966
+ },
967
+ {
968
+ "epoch": 0.08704,
969
+ "grad_norm": 66573.046875,
970
+ "learning_rate": 3.4792626728110603e-06,
971
+ "loss": 0.667,
972
+ "step": 1360
973
+ },
974
+ {
975
+ "epoch": 0.08768,
976
+ "grad_norm": 128433.109375,
977
+ "learning_rate": 3.5048643113159246e-06,
978
+ "loss": 0.6744,
979
+ "step": 1370
980
+ },
981
+ {
982
+ "epoch": 0.08832,
983
+ "grad_norm": 158480.765625,
984
+ "learning_rate": 3.530465949820789e-06,
985
+ "loss": 0.6636,
986
+ "step": 1380
987
+ },
988
+ {
989
+ "epoch": 0.08896,
990
+ "grad_norm": 62473.26953125,
991
+ "learning_rate": 3.5560675883256527e-06,
992
+ "loss": 0.6648,
993
+ "step": 1390
994
+ },
995
+ {
996
+ "epoch": 0.0896,
997
+ "grad_norm": 74170.6953125,
998
+ "learning_rate": 3.5816692268305174e-06,
999
+ "loss": 0.6775,
1000
+ "step": 1400
1001
+ }
1002
+ ],
1003
+ "logging_steps": 10,
1004
+ "max_steps": 78125,
1005
+ "num_input_tokens_seen": 0,
1006
+ "num_train_epochs": 5,
1007
+ "save_steps": 200,
1008
+ "stateful_callbacks": {
1009
+ "EarlyStoppingCallback": {
1010
+ "args": {
1011
+ "early_stopping_patience": 3,
1012
+ "early_stopping_threshold": 0.0
1013
+ },
1014
+ "attributes": {
1015
+ "early_stopping_patience_counter": 0
1016
+ }
1017
+ },
1018
+ "TrainerControl": {
1019
+ "args": {
1020
+ "should_epoch_stop": false,
1021
+ "should_evaluate": false,
1022
+ "should_log": false,
1023
+ "should_save": true,
1024
+ "should_training_stop": false
1025
+ },
1026
+ "attributes": {}
1027
+ }
1028
+ },
1029
+ "total_flos": 1.178305039215552e+16,
1030
+ "train_batch_size": 32,
1031
+ "trial_name": null,
1032
+ "trial_params": null
1033
+ }
graphcodebert-robust/checkpoint-1400/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64c0b8a6d392bd2e7b64d7504ce486a51b83e4079e79341886020b90ee199ffd
3
+ size 5841
graphcodebert-robust/checkpoint-1400/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-robust/checkpoint-1600/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.2,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.2,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-05,
17
+ "max_position_embeddings": 514,
18
+ "model_type": "roberta",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "output_past": true,
22
+ "pad_token_id": 1,
23
+ "position_embedding_type": "absolute",
24
+ "transformers_version": "4.56.0",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 50265
28
+ }
graphcodebert-robust/checkpoint-1600/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-robust/checkpoint-1600/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15639f8eb940de785a10f57f414930caabaeb707d1eef4957c41efa3088bdd21
3
+ size 498612824
graphcodebert-robust/checkpoint-1600/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:049a1b81e3e7aa55a57842ad2677214c0f24624c3d1b8ad90c7309df31ce4611
3
+ size 4741923
graphcodebert-robust/checkpoint-1600/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13732907e73a399dc511f6dc40d1789e18310f4a4b02a554d9781e58f5609487
3
+ size 14581
graphcodebert-robust/checkpoint-1600/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
3
+ size 1383
graphcodebert-robust/checkpoint-1600/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00dde6b1c6d4031ecf5cb551ceaa56a4302d6a08ccf15f60beff40f16187a67c
3
+ size 1465
graphcodebert-robust/checkpoint-1600/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
graphcodebert-robust/checkpoint-1600/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-robust/checkpoint-1600/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
graphcodebert-robust/checkpoint-1600/trainer_state.json ADDED
@@ -0,0 +1,1173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1000,
3
+ "best_metric": 0.7549859375827388,
4
+ "best_model_checkpoint": "./output_checkpoints/graphcodebert-robust/checkpoint-1000",
5
+ "epoch": 0.1024,
6
+ "eval_steps": 1000,
7
+ "global_step": 1600,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.00064,
14
+ "grad_norm": 1.6144306659698486,
15
+ "learning_rate": 1.1520000000000002e-08,
16
+ "loss": 0.729,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.00128,
21
+ "grad_norm": 2.0952296257019043,
22
+ "learning_rate": 2.4320000000000002e-08,
23
+ "loss": 0.7295,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.00192,
28
+ "grad_norm": 1.3587689399719238,
29
+ "learning_rate": 3.7120000000000004e-08,
30
+ "loss": 0.73,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.00256,
35
+ "grad_norm": 1.2531732320785522,
36
+ "learning_rate": 4.9920000000000006e-08,
37
+ "loss": 0.7221,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.0032,
42
+ "grad_norm": 1.437932014465332,
43
+ "learning_rate": 6.272000000000001e-08,
44
+ "loss": 0.7209,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.00384,
49
+ "grad_norm": 1.418426752090454,
50
+ "learning_rate": 7.552e-08,
51
+ "loss": 0.729,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.00448,
56
+ "grad_norm": 1.9476298093795776,
57
+ "learning_rate": 8.832e-08,
58
+ "loss": 0.7242,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.00512,
63
+ "grad_norm": 1.7948051691055298,
64
+ "learning_rate": 1.0112000000000001e-07,
65
+ "loss": 0.7227,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.00576,
70
+ "grad_norm": 1.6534360647201538,
71
+ "learning_rate": 1.1392e-07,
72
+ "loss": 0.7234,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.0064,
77
+ "grad_norm": 1.0920158624649048,
78
+ "learning_rate": 1.2672e-07,
79
+ "loss": 0.7328,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.00704,
84
+ "grad_norm": 1.977837085723877,
85
+ "learning_rate": 1.3952000000000002e-07,
86
+ "loss": 0.7263,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.00768,
91
+ "grad_norm": 1.388983130455017,
92
+ "learning_rate": 1.5232000000000003e-07,
93
+ "loss": 0.7286,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.00832,
98
+ "grad_norm": 1.2956682443618774,
99
+ "learning_rate": 1.6512e-07,
100
+ "loss": 0.7251,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.00896,
105
+ "grad_norm": 1.8125052452087402,
106
+ "learning_rate": 1.7792e-07,
107
+ "loss": 0.7251,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.0096,
112
+ "grad_norm": 1.626846194267273,
113
+ "learning_rate": 1.9072e-07,
114
+ "loss": 0.727,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.01024,
119
+ "grad_norm": 2.3243086338043213,
120
+ "learning_rate": 2.0352e-07,
121
+ "loss": 0.726,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 0.01088,
126
+ "grad_norm": 1.4734737873077393,
127
+ "learning_rate": 2.1632e-07,
128
+ "loss": 0.7252,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 0.01152,
133
+ "grad_norm": 2.090498685836792,
134
+ "learning_rate": 2.2912e-07,
135
+ "loss": 0.7273,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 0.01216,
140
+ "grad_norm": 1.7563093900680542,
141
+ "learning_rate": 2.4192000000000004e-07,
142
+ "loss": 0.719,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 0.0128,
147
+ "grad_norm": 1.449843168258667,
148
+ "learning_rate": 2.5472000000000005e-07,
149
+ "loss": 0.7237,
150
+ "step": 200
151
+ },
152
+ {
153
+ "epoch": 0.01344,
154
+ "grad_norm": 141396.296875,
155
+ "learning_rate": 5.350742447516642e-07,
156
+ "loss": 0.7217,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 0.01408,
161
+ "grad_norm": 102339.1640625,
162
+ "learning_rate": 5.606758832565284e-07,
163
+ "loss": 0.7215,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 0.01472,
168
+ "grad_norm": 134052.9375,
169
+ "learning_rate": 5.862775217613928e-07,
170
+ "loss": 0.7115,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 0.01536,
175
+ "grad_norm": 87181.984375,
176
+ "learning_rate": 6.118791602662571e-07,
177
+ "loss": 0.7241,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 0.016,
182
+ "grad_norm": 100231.328125,
183
+ "learning_rate": 6.374807987711214e-07,
184
+ "loss": 0.71,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 0.01664,
189
+ "grad_norm": 136721.484375,
190
+ "learning_rate": 6.630824372759858e-07,
191
+ "loss": 0.7188,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 0.01728,
196
+ "grad_norm": 115868.8125,
197
+ "learning_rate": 6.8868407578085e-07,
198
+ "loss": 0.7199,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 0.01792,
203
+ "grad_norm": 70205.1484375,
204
+ "learning_rate": 7.142857142857143e-07,
205
+ "loss": 0.7299,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 0.01856,
210
+ "grad_norm": 98926.4453125,
211
+ "learning_rate": 7.398873527905787e-07,
212
+ "loss": 0.7159,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 0.0192,
217
+ "grad_norm": 134108.140625,
218
+ "learning_rate": 7.65488991295443e-07,
219
+ "loss": 0.7122,
220
+ "step": 300
221
+ },
222
+ {
223
+ "epoch": 0.01984,
224
+ "grad_norm": 103719.140625,
225
+ "learning_rate": 7.910906298003073e-07,
226
+ "loss": 0.7185,
227
+ "step": 310
228
+ },
229
+ {
230
+ "epoch": 0.02048,
231
+ "grad_norm": 85624.953125,
232
+ "learning_rate": 8.166922683051716e-07,
233
+ "loss": 0.718,
234
+ "step": 320
235
+ },
236
+ {
237
+ "epoch": 0.02112,
238
+ "grad_norm": 138824.15625,
239
+ "learning_rate": 8.422939068100359e-07,
240
+ "loss": 0.713,
241
+ "step": 330
242
+ },
243
+ {
244
+ "epoch": 0.02176,
245
+ "grad_norm": 73629.0859375,
246
+ "learning_rate": 8.678955453149002e-07,
247
+ "loss": 0.7186,
248
+ "step": 340
249
+ },
250
+ {
251
+ "epoch": 0.0224,
252
+ "grad_norm": 132493.0,
253
+ "learning_rate": 8.934971838197646e-07,
254
+ "loss": 0.7133,
255
+ "step": 350
256
+ },
257
+ {
258
+ "epoch": 0.02304,
259
+ "grad_norm": 85223.625,
260
+ "learning_rate": 9.190988223246289e-07,
261
+ "loss": 0.7124,
262
+ "step": 360
263
+ },
264
+ {
265
+ "epoch": 0.02368,
266
+ "grad_norm": 77868.78125,
267
+ "learning_rate": 9.447004608294931e-07,
268
+ "loss": 0.7058,
269
+ "step": 370
270
+ },
271
+ {
272
+ "epoch": 0.02432,
273
+ "grad_norm": 75874.3046875,
274
+ "learning_rate": 9.703020993343575e-07,
275
+ "loss": 0.7139,
276
+ "step": 380
277
+ },
278
+ {
279
+ "epoch": 0.02496,
280
+ "grad_norm": 151937.703125,
281
+ "learning_rate": 9.959037378392218e-07,
282
+ "loss": 0.713,
283
+ "step": 390
284
+ },
285
+ {
286
+ "epoch": 0.0256,
287
+ "grad_norm": 161711.671875,
288
+ "learning_rate": 1.021505376344086e-06,
289
+ "loss": 0.7137,
290
+ "step": 400
291
+ },
292
+ {
293
+ "epoch": 0.02624,
294
+ "grad_norm": 90800.234375,
295
+ "learning_rate": 1.0471070148489503e-06,
296
+ "loss": 0.7091,
297
+ "step": 410
298
+ },
299
+ {
300
+ "epoch": 0.02688,
301
+ "grad_norm": 82131.34375,
302
+ "learning_rate": 1.0727086533538148e-06,
303
+ "loss": 0.7098,
304
+ "step": 420
305
+ },
306
+ {
307
+ "epoch": 0.02752,
308
+ "grad_norm": 92818.9140625,
309
+ "learning_rate": 1.0983102918586791e-06,
310
+ "loss": 0.7099,
311
+ "step": 430
312
+ },
313
+ {
314
+ "epoch": 0.02816,
315
+ "grad_norm": 88555.5078125,
316
+ "learning_rate": 1.1239119303635434e-06,
317
+ "loss": 0.7086,
318
+ "step": 440
319
+ },
320
+ {
321
+ "epoch": 0.0288,
322
+ "grad_norm": 73428.6015625,
323
+ "learning_rate": 1.1495135688684077e-06,
324
+ "loss": 0.7117,
325
+ "step": 450
326
+ },
327
+ {
328
+ "epoch": 0.02944,
329
+ "grad_norm": 128938.7421875,
330
+ "learning_rate": 1.175115207373272e-06,
331
+ "loss": 0.7182,
332
+ "step": 460
333
+ },
334
+ {
335
+ "epoch": 0.03008,
336
+ "grad_norm": 102742.3359375,
337
+ "learning_rate": 1.2007168458781362e-06,
338
+ "loss": 0.7108,
339
+ "step": 470
340
+ },
341
+ {
342
+ "epoch": 0.03072,
343
+ "grad_norm": 73825.8125,
344
+ "learning_rate": 1.2263184843830007e-06,
345
+ "loss": 0.7087,
346
+ "step": 480
347
+ },
348
+ {
349
+ "epoch": 0.03136,
350
+ "grad_norm": 110930.75,
351
+ "learning_rate": 1.251920122887865e-06,
352
+ "loss": 0.7232,
353
+ "step": 490
354
+ },
355
+ {
356
+ "epoch": 0.032,
357
+ "grad_norm": 95068.84375,
358
+ "learning_rate": 1.2775217613927293e-06,
359
+ "loss": 0.703,
360
+ "step": 500
361
+ },
362
+ {
363
+ "epoch": 0.03264,
364
+ "grad_norm": 118731.9296875,
365
+ "learning_rate": 1.3031233998975938e-06,
366
+ "loss": 0.7063,
367
+ "step": 510
368
+ },
369
+ {
370
+ "epoch": 0.03328,
371
+ "grad_norm": 80511.828125,
372
+ "learning_rate": 1.3287250384024578e-06,
373
+ "loss": 0.7143,
374
+ "step": 520
375
+ },
376
+ {
377
+ "epoch": 0.03392,
378
+ "grad_norm": 84864.484375,
379
+ "learning_rate": 1.354326676907322e-06,
380
+ "loss": 0.7055,
381
+ "step": 530
382
+ },
383
+ {
384
+ "epoch": 0.03456,
385
+ "grad_norm": 107800.109375,
386
+ "learning_rate": 1.3799283154121864e-06,
387
+ "loss": 0.7119,
388
+ "step": 540
389
+ },
390
+ {
391
+ "epoch": 0.0352,
392
+ "grad_norm": 83667.671875,
393
+ "learning_rate": 1.4055299539170509e-06,
394
+ "loss": 0.7082,
395
+ "step": 550
396
+ },
397
+ {
398
+ "epoch": 0.03584,
399
+ "grad_norm": 75656.4140625,
400
+ "learning_rate": 1.4311315924219151e-06,
401
+ "loss": 0.7062,
402
+ "step": 560
403
+ },
404
+ {
405
+ "epoch": 0.03648,
406
+ "grad_norm": 79985.875,
407
+ "learning_rate": 1.4567332309267796e-06,
408
+ "loss": 0.7155,
409
+ "step": 570
410
+ },
411
+ {
412
+ "epoch": 0.03712,
413
+ "grad_norm": 76334.078125,
414
+ "learning_rate": 1.4823348694316437e-06,
415
+ "loss": 0.7075,
416
+ "step": 580
417
+ },
418
+ {
419
+ "epoch": 0.03776,
420
+ "grad_norm": 140764.03125,
421
+ "learning_rate": 1.507936507936508e-06,
422
+ "loss": 0.7065,
423
+ "step": 590
424
+ },
425
+ {
426
+ "epoch": 0.0384,
427
+ "grad_norm": 100877.296875,
428
+ "learning_rate": 1.5335381464413722e-06,
429
+ "loss": 0.7096,
430
+ "step": 600
431
+ },
432
+ {
433
+ "epoch": 0.03904,
434
+ "grad_norm": 104088.1171875,
435
+ "learning_rate": 1.5591397849462367e-06,
436
+ "loss": 0.6987,
437
+ "step": 610
438
+ },
439
+ {
440
+ "epoch": 0.03968,
441
+ "grad_norm": 80806.2265625,
442
+ "learning_rate": 1.584741423451101e-06,
443
+ "loss": 0.707,
444
+ "step": 620
445
+ },
446
+ {
447
+ "epoch": 0.04032,
448
+ "grad_norm": 109884.765625,
449
+ "learning_rate": 1.6103430619559655e-06,
450
+ "loss": 0.6991,
451
+ "step": 630
452
+ },
453
+ {
454
+ "epoch": 0.04096,
455
+ "grad_norm": 79944.890625,
456
+ "learning_rate": 1.6359447004608298e-06,
457
+ "loss": 0.7047,
458
+ "step": 640
459
+ },
460
+ {
461
+ "epoch": 0.0416,
462
+ "grad_norm": 93673.3828125,
463
+ "learning_rate": 1.6615463389656938e-06,
464
+ "loss": 0.6971,
465
+ "step": 650
466
+ },
467
+ {
468
+ "epoch": 0.04224,
469
+ "grad_norm": 76641.265625,
470
+ "learning_rate": 1.6871479774705581e-06,
471
+ "loss": 0.6957,
472
+ "step": 660
473
+ },
474
+ {
475
+ "epoch": 0.04288,
476
+ "grad_norm": 73583.5546875,
477
+ "learning_rate": 1.7127496159754226e-06,
478
+ "loss": 0.7028,
479
+ "step": 670
480
+ },
481
+ {
482
+ "epoch": 0.04352,
483
+ "grad_norm": 75177.9609375,
484
+ "learning_rate": 1.7383512544802869e-06,
485
+ "loss": 0.7012,
486
+ "step": 680
487
+ },
488
+ {
489
+ "epoch": 0.04416,
490
+ "grad_norm": 78340.8515625,
491
+ "learning_rate": 1.7639528929851512e-06,
492
+ "loss": 0.6987,
493
+ "step": 690
494
+ },
495
+ {
496
+ "epoch": 0.0448,
497
+ "grad_norm": 86004.1171875,
498
+ "learning_rate": 1.7895545314900157e-06,
499
+ "loss": 0.7061,
500
+ "step": 700
501
+ },
502
+ {
503
+ "epoch": 0.04544,
504
+ "grad_norm": 94212.0390625,
505
+ "learning_rate": 1.8151561699948797e-06,
506
+ "loss": 0.6993,
507
+ "step": 710
508
+ },
509
+ {
510
+ "epoch": 0.04608,
511
+ "grad_norm": 83918.2421875,
512
+ "learning_rate": 1.840757808499744e-06,
513
+ "loss": 0.7009,
514
+ "step": 720
515
+ },
516
+ {
517
+ "epoch": 0.04672,
518
+ "grad_norm": 68374.3125,
519
+ "learning_rate": 1.8663594470046085e-06,
520
+ "loss": 0.6964,
521
+ "step": 730
522
+ },
523
+ {
524
+ "epoch": 0.04736,
525
+ "grad_norm": 90348.78125,
526
+ "learning_rate": 1.8919610855094728e-06,
527
+ "loss": 0.7011,
528
+ "step": 740
529
+ },
530
+ {
531
+ "epoch": 0.048,
532
+ "grad_norm": 146658.0,
533
+ "learning_rate": 1.9175627240143373e-06,
534
+ "loss": 0.7003,
535
+ "step": 750
536
+ },
537
+ {
538
+ "epoch": 0.04864,
539
+ "grad_norm": 112037.1640625,
540
+ "learning_rate": 1.9431643625192015e-06,
541
+ "loss": 0.7051,
542
+ "step": 760
543
+ },
544
+ {
545
+ "epoch": 0.04928,
546
+ "grad_norm": 70628.625,
547
+ "learning_rate": 1.9687660010240654e-06,
548
+ "loss": 0.6923,
549
+ "step": 770
550
+ },
551
+ {
552
+ "epoch": 0.04992,
553
+ "grad_norm": 109922.125,
554
+ "learning_rate": 1.99436763952893e-06,
555
+ "loss": 0.6893,
556
+ "step": 780
557
+ },
558
+ {
559
+ "epoch": 0.05056,
560
+ "grad_norm": 135306.375,
561
+ "learning_rate": 2.0199692780337944e-06,
562
+ "loss": 0.7008,
563
+ "step": 790
564
+ },
565
+ {
566
+ "epoch": 0.0512,
567
+ "grad_norm": 82354.8046875,
568
+ "learning_rate": 2.0455709165386586e-06,
569
+ "loss": 0.705,
570
+ "step": 800
571
+ },
572
+ {
573
+ "epoch": 0.05184,
574
+ "grad_norm": 95951.671875,
575
+ "learning_rate": 2.071172555043523e-06,
576
+ "loss": 0.6912,
577
+ "step": 810
578
+ },
579
+ {
580
+ "epoch": 0.05248,
581
+ "grad_norm": 96797.4609375,
582
+ "learning_rate": 2.096774193548387e-06,
583
+ "loss": 0.6922,
584
+ "step": 820
585
+ },
586
+ {
587
+ "epoch": 0.05312,
588
+ "grad_norm": 87190.625,
589
+ "learning_rate": 2.122375832053252e-06,
590
+ "loss": 0.6946,
591
+ "step": 830
592
+ },
593
+ {
594
+ "epoch": 0.05376,
595
+ "grad_norm": 87958.5625,
596
+ "learning_rate": 2.1479774705581158e-06,
597
+ "loss": 0.6949,
598
+ "step": 840
599
+ },
600
+ {
601
+ "epoch": 0.0544,
602
+ "grad_norm": 77217.1796875,
603
+ "learning_rate": 2.17357910906298e-06,
604
+ "loss": 0.6928,
605
+ "step": 850
606
+ },
607
+ {
608
+ "epoch": 0.05504,
609
+ "grad_norm": 117156.5546875,
610
+ "learning_rate": 2.1991807475678443e-06,
611
+ "loss": 0.692,
612
+ "step": 860
613
+ },
614
+ {
615
+ "epoch": 0.05568,
616
+ "grad_norm": 94618.6875,
617
+ "learning_rate": 2.224782386072709e-06,
618
+ "loss": 0.6976,
619
+ "step": 870
620
+ },
621
+ {
622
+ "epoch": 0.05632,
623
+ "grad_norm": 71444.6484375,
624
+ "learning_rate": 2.2503840245775733e-06,
625
+ "loss": 0.6989,
626
+ "step": 880
627
+ },
628
+ {
629
+ "epoch": 0.05696,
630
+ "grad_norm": 159991.609375,
631
+ "learning_rate": 2.2759856630824376e-06,
632
+ "loss": 0.6928,
633
+ "step": 890
634
+ },
635
+ {
636
+ "epoch": 0.0576,
637
+ "grad_norm": 81899.6875,
638
+ "learning_rate": 2.301587301587302e-06,
639
+ "loss": 0.691,
640
+ "step": 900
641
+ },
642
+ {
643
+ "epoch": 0.05824,
644
+ "grad_norm": 110817.3671875,
645
+ "learning_rate": 2.327188940092166e-06,
646
+ "loss": 0.6858,
647
+ "step": 910
648
+ },
649
+ {
650
+ "epoch": 0.05888,
651
+ "grad_norm": 105698.109375,
652
+ "learning_rate": 2.3527905785970304e-06,
653
+ "loss": 0.6965,
654
+ "step": 920
655
+ },
656
+ {
657
+ "epoch": 0.05952,
658
+ "grad_norm": 76475.0,
659
+ "learning_rate": 2.3783922171018947e-06,
660
+ "loss": 0.6901,
661
+ "step": 930
662
+ },
663
+ {
664
+ "epoch": 0.06016,
665
+ "grad_norm": 96672.6796875,
666
+ "learning_rate": 2.403993855606759e-06,
667
+ "loss": 0.6908,
668
+ "step": 940
669
+ },
670
+ {
671
+ "epoch": 0.0608,
672
+ "grad_norm": 114510.8125,
673
+ "learning_rate": 2.4295954941116232e-06,
674
+ "loss": 0.6904,
675
+ "step": 950
676
+ },
677
+ {
678
+ "epoch": 0.06144,
679
+ "grad_norm": 62412.4375,
680
+ "learning_rate": 2.455197132616488e-06,
681
+ "loss": 0.6855,
682
+ "step": 960
683
+ },
684
+ {
685
+ "epoch": 0.06208,
686
+ "grad_norm": 92860.7109375,
687
+ "learning_rate": 2.4807987711213518e-06,
688
+ "loss": 0.6752,
689
+ "step": 970
690
+ },
691
+ {
692
+ "epoch": 0.06272,
693
+ "grad_norm": 75184.359375,
694
+ "learning_rate": 2.506400409626216e-06,
695
+ "loss": 0.6868,
696
+ "step": 980
697
+ },
698
+ {
699
+ "epoch": 0.06336,
700
+ "grad_norm": 77771.1640625,
701
+ "learning_rate": 2.5320020481310808e-06,
702
+ "loss": 0.6941,
703
+ "step": 990
704
+ },
705
+ {
706
+ "epoch": 0.064,
707
+ "grad_norm": 65366.796875,
708
+ "learning_rate": 2.557603686635945e-06,
709
+ "loss": 0.6808,
710
+ "step": 1000
711
+ },
712
+ {
713
+ "epoch": 0.064,
714
+ "eval_accuracy": 0.75744,
715
+ "eval_loss": 0.6539617776870728,
716
+ "eval_macro_f1": 0.7549859375827388,
717
+ "eval_runtime": 1576.6702,
718
+ "eval_samples_per_second": 63.425,
719
+ "eval_steps_per_second": 0.496,
720
+ "step": 1000
721
+ },
722
+ {
723
+ "epoch": 0.06464,
724
+ "grad_norm": 73310.6171875,
725
+ "learning_rate": 2.583205325140809e-06,
726
+ "loss": 0.6866,
727
+ "step": 1010
728
+ },
729
+ {
730
+ "epoch": 0.06528,
731
+ "grad_norm": 80602.859375,
732
+ "learning_rate": 2.6088069636456736e-06,
733
+ "loss": 0.6873,
734
+ "step": 1020
735
+ },
736
+ {
737
+ "epoch": 0.06592,
738
+ "grad_norm": 121537.0234375,
739
+ "learning_rate": 2.634408602150538e-06,
740
+ "loss": 0.6806,
741
+ "step": 1030
742
+ },
743
+ {
744
+ "epoch": 0.06656,
745
+ "grad_norm": 105537.46875,
746
+ "learning_rate": 2.6600102406554026e-06,
747
+ "loss": 0.6835,
748
+ "step": 1040
749
+ },
750
+ {
751
+ "epoch": 0.0672,
752
+ "grad_norm": 188847.71875,
753
+ "learning_rate": 2.6856118791602664e-06,
754
+ "loss": 0.687,
755
+ "step": 1050
756
+ },
757
+ {
758
+ "epoch": 0.06784,
759
+ "grad_norm": 73677.8359375,
760
+ "learning_rate": 2.7112135176651307e-06,
761
+ "loss": 0.6848,
762
+ "step": 1060
763
+ },
764
+ {
765
+ "epoch": 0.06848,
766
+ "grad_norm": 72158.984375,
767
+ "learning_rate": 2.736815156169995e-06,
768
+ "loss": 0.6833,
769
+ "step": 1070
770
+ },
771
+ {
772
+ "epoch": 0.06912,
773
+ "grad_norm": 73585.1015625,
774
+ "learning_rate": 2.7624167946748593e-06,
775
+ "loss": 0.6766,
776
+ "step": 1080
777
+ },
778
+ {
779
+ "epoch": 0.06976,
780
+ "grad_norm": 78721.0390625,
781
+ "learning_rate": 2.788018433179724e-06,
782
+ "loss": 0.6796,
783
+ "step": 1090
784
+ },
785
+ {
786
+ "epoch": 0.0704,
787
+ "grad_norm": 72044.3515625,
788
+ "learning_rate": 2.813620071684588e-06,
789
+ "loss": 0.673,
790
+ "step": 1100
791
+ },
792
+ {
793
+ "epoch": 0.07104,
794
+ "grad_norm": 100059.984375,
795
+ "learning_rate": 2.8392217101894525e-06,
796
+ "loss": 0.6783,
797
+ "step": 1110
798
+ },
799
+ {
800
+ "epoch": 0.07168,
801
+ "grad_norm": 92175.921875,
802
+ "learning_rate": 2.864823348694317e-06,
803
+ "loss": 0.6901,
804
+ "step": 1120
805
+ },
806
+ {
807
+ "epoch": 0.07232,
808
+ "grad_norm": 86143.453125,
809
+ "learning_rate": 2.8904249871991806e-06,
810
+ "loss": 0.6769,
811
+ "step": 1130
812
+ },
813
+ {
814
+ "epoch": 0.07296,
815
+ "grad_norm": 101410.171875,
816
+ "learning_rate": 2.9160266257040453e-06,
817
+ "loss": 0.6781,
818
+ "step": 1140
819
+ },
820
+ {
821
+ "epoch": 0.0736,
822
+ "grad_norm": 67173.296875,
823
+ "learning_rate": 2.9416282642089096e-06,
824
+ "loss": 0.6737,
825
+ "step": 1150
826
+ },
827
+ {
828
+ "epoch": 0.07424,
829
+ "grad_norm": 100701.8203125,
830
+ "learning_rate": 2.967229902713774e-06,
831
+ "loss": 0.6741,
832
+ "step": 1160
833
+ },
834
+ {
835
+ "epoch": 0.07488,
836
+ "grad_norm": 75457.328125,
837
+ "learning_rate": 2.992831541218638e-06,
838
+ "loss": 0.6742,
839
+ "step": 1170
840
+ },
841
+ {
842
+ "epoch": 0.07552,
843
+ "grad_norm": 97755.9921875,
844
+ "learning_rate": 3.018433179723503e-06,
845
+ "loss": 0.6845,
846
+ "step": 1180
847
+ },
848
+ {
849
+ "epoch": 0.07616,
850
+ "grad_norm": 53426.1171875,
851
+ "learning_rate": 3.0440348182283667e-06,
852
+ "loss": 0.6718,
853
+ "step": 1190
854
+ },
855
+ {
856
+ "epoch": 0.0768,
857
+ "grad_norm": 71654.625,
858
+ "learning_rate": 3.069636456733231e-06,
859
+ "loss": 0.6798,
860
+ "step": 1200
861
+ },
862
+ {
863
+ "epoch": 0.07744,
864
+ "grad_norm": 74562.71875,
865
+ "learning_rate": 3.0952380952380957e-06,
866
+ "loss": 0.6771,
867
+ "step": 1210
868
+ },
869
+ {
870
+ "epoch": 0.07808,
871
+ "grad_norm": 102821.5,
872
+ "learning_rate": 3.1208397337429596e-06,
873
+ "loss": 0.6682,
874
+ "step": 1220
875
+ },
876
+ {
877
+ "epoch": 0.07872,
878
+ "grad_norm": 102060.71875,
879
+ "learning_rate": 3.1464413722478243e-06,
880
+ "loss": 0.6734,
881
+ "step": 1230
882
+ },
883
+ {
884
+ "epoch": 0.07936,
885
+ "grad_norm": 106793.0546875,
886
+ "learning_rate": 3.1720430107526885e-06,
887
+ "loss": 0.6775,
888
+ "step": 1240
889
+ },
890
+ {
891
+ "epoch": 0.08,
892
+ "grad_norm": 118106.40625,
893
+ "learning_rate": 3.1976446492575524e-06,
894
+ "loss": 0.6789,
895
+ "step": 1250
896
+ },
897
+ {
898
+ "epoch": 0.08064,
899
+ "grad_norm": 80626.078125,
900
+ "learning_rate": 3.223246287762417e-06,
901
+ "loss": 0.6675,
902
+ "step": 1260
903
+ },
904
+ {
905
+ "epoch": 0.08128,
906
+ "grad_norm": 78956.4375,
907
+ "learning_rate": 3.2488479262672814e-06,
908
+ "loss": 0.6658,
909
+ "step": 1270
910
+ },
911
+ {
912
+ "epoch": 0.08192,
913
+ "grad_norm": 98567.125,
914
+ "learning_rate": 3.2744495647721457e-06,
915
+ "loss": 0.6726,
916
+ "step": 1280
917
+ },
918
+ {
919
+ "epoch": 0.08256,
920
+ "grad_norm": 84071.5546875,
921
+ "learning_rate": 3.30005120327701e-06,
922
+ "loss": 0.6793,
923
+ "step": 1290
924
+ },
925
+ {
926
+ "epoch": 0.0832,
927
+ "grad_norm": 92090.375,
928
+ "learning_rate": 3.3256528417818746e-06,
929
+ "loss": 0.6758,
930
+ "step": 1300
931
+ },
932
+ {
933
+ "epoch": 0.08384,
934
+ "grad_norm": 82021.3671875,
935
+ "learning_rate": 3.3512544802867385e-06,
936
+ "loss": 0.6731,
937
+ "step": 1310
938
+ },
939
+ {
940
+ "epoch": 0.08448,
941
+ "grad_norm": 156372.765625,
942
+ "learning_rate": 3.3768561187916028e-06,
943
+ "loss": 0.6657,
944
+ "step": 1320
945
+ },
946
+ {
947
+ "epoch": 0.08512,
948
+ "grad_norm": 71925.234375,
949
+ "learning_rate": 3.4024577572964675e-06,
950
+ "loss": 0.6838,
951
+ "step": 1330
952
+ },
953
+ {
954
+ "epoch": 0.08576,
955
+ "grad_norm": 103299.3828125,
956
+ "learning_rate": 3.4280593958013313e-06,
957
+ "loss": 0.663,
958
+ "step": 1340
959
+ },
960
+ {
961
+ "epoch": 0.0864,
962
+ "grad_norm": 71233.90625,
963
+ "learning_rate": 3.453661034306196e-06,
964
+ "loss": 0.6754,
965
+ "step": 1350
966
+ },
967
+ {
968
+ "epoch": 0.08704,
969
+ "grad_norm": 66573.046875,
970
+ "learning_rate": 3.4792626728110603e-06,
971
+ "loss": 0.667,
972
+ "step": 1360
973
+ },
974
+ {
975
+ "epoch": 0.08768,
976
+ "grad_norm": 128433.109375,
977
+ "learning_rate": 3.5048643113159246e-06,
978
+ "loss": 0.6744,
979
+ "step": 1370
980
+ },
981
+ {
982
+ "epoch": 0.08832,
983
+ "grad_norm": 158480.765625,
984
+ "learning_rate": 3.530465949820789e-06,
985
+ "loss": 0.6636,
986
+ "step": 1380
987
+ },
988
+ {
989
+ "epoch": 0.08896,
990
+ "grad_norm": 62473.26953125,
991
+ "learning_rate": 3.5560675883256527e-06,
992
+ "loss": 0.6648,
993
+ "step": 1390
994
+ },
995
+ {
996
+ "epoch": 0.0896,
997
+ "grad_norm": 74170.6953125,
998
+ "learning_rate": 3.5816692268305174e-06,
999
+ "loss": 0.6775,
1000
+ "step": 1400
1001
+ },
1002
+ {
1003
+ "epoch": 0.09024,
1004
+ "grad_norm": 138458.296875,
1005
+ "learning_rate": 3.6072708653353817e-06,
1006
+ "loss": 0.6712,
1007
+ "step": 1410
1008
+ },
1009
+ {
1010
+ "epoch": 0.09088,
1011
+ "grad_norm": 90254.9921875,
1012
+ "learning_rate": 3.6328725038402464e-06,
1013
+ "loss": 0.6798,
1014
+ "step": 1420
1015
+ },
1016
+ {
1017
+ "epoch": 0.09152,
1018
+ "grad_norm": 68962.5390625,
1019
+ "learning_rate": 3.6584741423451102e-06,
1020
+ "loss": 0.6671,
1021
+ "step": 1430
1022
+ },
1023
+ {
1024
+ "epoch": 0.09216,
1025
+ "grad_norm": 96779.4609375,
1026
+ "learning_rate": 3.684075780849975e-06,
1027
+ "loss": 0.6521,
1028
+ "step": 1440
1029
+ },
1030
+ {
1031
+ "epoch": 0.0928,
1032
+ "grad_norm": 105383.8203125,
1033
+ "learning_rate": 3.7096774193548392e-06,
1034
+ "loss": 0.6648,
1035
+ "step": 1450
1036
+ },
1037
+ {
1038
+ "epoch": 0.09344,
1039
+ "grad_norm": 78728.4609375,
1040
+ "learning_rate": 3.735279057859703e-06,
1041
+ "loss": 0.6637,
1042
+ "step": 1460
1043
+ },
1044
+ {
1045
+ "epoch": 0.09408,
1046
+ "grad_norm": 121998.46875,
1047
+ "learning_rate": 3.7608806963645678e-06,
1048
+ "loss": 0.6642,
1049
+ "step": 1470
1050
+ },
1051
+ {
1052
+ "epoch": 0.09472,
1053
+ "grad_norm": 87487.171875,
1054
+ "learning_rate": 3.786482334869432e-06,
1055
+ "loss": 0.6584,
1056
+ "step": 1480
1057
+ },
1058
+ {
1059
+ "epoch": 0.09536,
1060
+ "grad_norm": 81816.6640625,
1061
+ "learning_rate": 3.8120839733742963e-06,
1062
+ "loss": 0.6699,
1063
+ "step": 1490
1064
+ },
1065
+ {
1066
+ "epoch": 0.096,
1067
+ "grad_norm": 63027.9453125,
1068
+ "learning_rate": 3.83768561187916e-06,
1069
+ "loss": 0.6642,
1070
+ "step": 1500
1071
+ },
1072
+ {
1073
+ "epoch": 0.09664,
1074
+ "grad_norm": 69659.5,
1075
+ "learning_rate": 3.8632872503840245e-06,
1076
+ "loss": 0.6671,
1077
+ "step": 1510
1078
+ },
1079
+ {
1080
+ "epoch": 0.09728,
1081
+ "grad_norm": 136521.953125,
1082
+ "learning_rate": 3.88888888888889e-06,
1083
+ "loss": 0.6578,
1084
+ "step": 1520
1085
+ },
1086
+ {
1087
+ "epoch": 0.09792,
1088
+ "grad_norm": 75749.6640625,
1089
+ "learning_rate": 3.914490527393753e-06,
1090
+ "loss": 0.6654,
1091
+ "step": 1530
1092
+ },
1093
+ {
1094
+ "epoch": 0.09856,
1095
+ "grad_norm": 69284.6640625,
1096
+ "learning_rate": 3.940092165898618e-06,
1097
+ "loss": 0.6734,
1098
+ "step": 1540
1099
+ },
1100
+ {
1101
+ "epoch": 0.0992,
1102
+ "grad_norm": 78889.4921875,
1103
+ "learning_rate": 3.965693804403482e-06,
1104
+ "loss": 0.663,
1105
+ "step": 1550
1106
+ },
1107
+ {
1108
+ "epoch": 0.09984,
1109
+ "grad_norm": 65618.0546875,
1110
+ "learning_rate": 3.991295442908347e-06,
1111
+ "loss": 0.6606,
1112
+ "step": 1560
1113
+ },
1114
+ {
1115
+ "epoch": 0.10048,
1116
+ "grad_norm": 138387.71875,
1117
+ "learning_rate": 4.016897081413211e-06,
1118
+ "loss": 0.6609,
1119
+ "step": 1570
1120
+ },
1121
+ {
1122
+ "epoch": 0.10112,
1123
+ "grad_norm": 92268.203125,
1124
+ "learning_rate": 4.042498719918075e-06,
1125
+ "loss": 0.6649,
1126
+ "step": 1580
1127
+ },
1128
+ {
1129
+ "epoch": 0.10176,
1130
+ "grad_norm": 93633.984375,
1131
+ "learning_rate": 4.0681003584229395e-06,
1132
+ "loss": 0.6726,
1133
+ "step": 1590
1134
+ },
1135
+ {
1136
+ "epoch": 0.1024,
1137
+ "grad_norm": 70783.703125,
1138
+ "learning_rate": 4.093701996927804e-06,
1139
+ "loss": 0.6551,
1140
+ "step": 1600
1141
+ }
1142
+ ],
1143
+ "logging_steps": 10,
1144
+ "max_steps": 78125,
1145
+ "num_input_tokens_seen": 0,
1146
+ "num_train_epochs": 5,
1147
+ "save_steps": 200,
1148
+ "stateful_callbacks": {
1149
+ "EarlyStoppingCallback": {
1150
+ "args": {
1151
+ "early_stopping_patience": 3,
1152
+ "early_stopping_threshold": 0.0
1153
+ },
1154
+ "attributes": {
1155
+ "early_stopping_patience_counter": 0
1156
+ }
1157
+ },
1158
+ "TrainerControl": {
1159
+ "args": {
1160
+ "should_epoch_stop": false,
1161
+ "should_evaluate": false,
1162
+ "should_log": false,
1163
+ "should_save": true,
1164
+ "should_training_stop": false
1165
+ },
1166
+ "attributes": {}
1167
+ }
1168
+ },
1169
+ "total_flos": 1.346696114645952e+16,
1170
+ "train_batch_size": 32,
1171
+ "trial_name": null,
1172
+ "trial_params": null
1173
+ }
graphcodebert-robust/checkpoint-1600/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64c0b8a6d392bd2e7b64d7504ce486a51b83e4079e79341886020b90ee199ffd
3
+ size 5841
graphcodebert-robust/checkpoint-1600/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-robust/training.log CHANGED
@@ -1,10 +1,10 @@
1
- 2026-04-16 09:18:35,093 - INFO - train_pipeline - Logging to ./output_checkpoints/graphcodebert-robust/training.log
2
- 2026-04-16 09:18:35,094 - INFO - train_pipeline - Training config: TrainConfig(model_name='microsoft/graphcodebert-base', output_dir='./output_checkpoints/graphcodebert-robust', num_epochs=5, batch_size=32, learning_rate=2e-05, max_length=512, num_labels=2, use_wandb=True, freeze_base=True, loss_type='r-drop', focal_alpha=1.0, focal_gamma=2.0, r_drop_alpha=4.0, infonce_temperature=0.07, infonce_weight=0.5, seed=42, resume_from_checkpoint='checkpoints/graphcodebert-robust/checkpoint-200', label_smoothing=0.1, adversarial_epsilon=0.5, use_swa=True, swa_start_epoch=2, swa_lr=1e-05, data_augmentation=True, aug_rename_prob=0.3, aug_format_prob=0.3, device=device(type='cuda'))
3
- 2026-04-16 09:18:35,094 - INFO - train_pipeline - Loading model & tokenizer for 'microsoft/graphcodebert-base'
4
- 2026-04-16 09:18:43,368 - INFO - train_pipeline - Model placed on cuda
5
- 2026-04-16 09:18:43,371 - INFO - train_pipeline - Base model weights frozen – only classifier head will be trained.
6
- 2026-04-16 09:18:43,372 - INFO - train_pipeline - ===== Model Architecture =====
7
- 2026-04-16 09:18:43,375 - INFO - train_pipeline -
8
  RobertaForSequenceClassification(
9
  (roberta): RobertaModel(
10
  (embeddings): RobertaEmbeddings(
@@ -49,12 +49,12 @@ RobertaForSequenceClassification(
49
  (out_proj): Linear(in_features=768, out_features=2, bias=True)
50
  )
51
  )
52
- 2026-04-16 09:18:43,377 - INFO - train_pipeline - ===== Parameter Summary =====
53
- 2026-04-16 09:18:43,378 - INFO - train_pipeline - Total Parameters: 124,647,170
54
- 2026-04-16 09:18:43,380 - INFO - train_pipeline - Trainable Parameters: 592,130
55
- 2026-04-16 09:18:43,381 - INFO - train_pipeline - Non-trainable Parameters: 124,055,040
56
- 2026-04-16 09:18:43,381 - INFO - train_pipeline - ===== Tokenizer Summary =====
57
- 2026-04-16 09:18:43,409 - INFO - train_pipeline - Vocab size: 50265 | Special tokens: ['<s>', '</s>', '<unk>', '<pad>', '<mask>']
58
- 2026-04-16 09:18:43,410 - INFO - train_pipeline - ===== End of Architecture Log =====
59
- 2026-04-16 09:18:43,411 - INFO - train_pipeline - Data augmentation enabled (rename=0.3, format=0.3)
60
- 2026-04-16 09:22:04,475 - INFO - train_pipeline - === Starting training with robust regularisation ===
 
1
+ 2026-04-16 10:18:35,455 - INFO - train_pipeline - Logging to ./output_checkpoints/graphcodebert-robust/training.log
2
+ 2026-04-16 10:18:35,457 - INFO - train_pipeline - Training config: TrainConfig(model_name='microsoft/graphcodebert-base', output_dir='./output_checkpoints/graphcodebert-robust', num_epochs=5, batch_size=32, learning_rate=2e-05, max_length=512, num_labels=2, use_wandb=True, freeze_base=True, loss_type='r-drop', focal_alpha=1.0, focal_gamma=2.0, r_drop_alpha=4.0, infonce_temperature=0.07, infonce_weight=0.5, seed=42, resume_from_checkpoint='output_checkpoints/graphcodebert-robust/checkpoint-1000', label_smoothing=0.1, adversarial_epsilon=0.5, use_swa=True, swa_start_epoch=2, swa_lr=1e-05, data_augmentation=True, aug_rename_prob=0.3, aug_format_prob=0.3, device=device(type='cuda'))
3
+ 2026-04-16 10:18:35,458 - INFO - train_pipeline - Loading model & tokenizer for 'microsoft/graphcodebert-base'
4
+ 2026-04-16 10:18:36,698 - INFO - train_pipeline - Model placed on cuda
5
+ 2026-04-16 10:18:36,701 - INFO - train_pipeline - Base model weights frozen – only classifier head will be trained.
6
+ 2026-04-16 10:18:36,702 - INFO - train_pipeline - ===== Model Architecture =====
7
+ 2026-04-16 10:18:36,705 - INFO - train_pipeline -
8
  RobertaForSequenceClassification(
9
  (roberta): RobertaModel(
10
  (embeddings): RobertaEmbeddings(
 
49
  (out_proj): Linear(in_features=768, out_features=2, bias=True)
50
  )
51
  )
52
+ 2026-04-16 10:18:36,707 - INFO - train_pipeline - ===== Parameter Summary =====
53
+ 2026-04-16 10:18:36,709 - INFO - train_pipeline - Total Parameters: 124,647,170
54
+ 2026-04-16 10:18:36,711 - INFO - train_pipeline - Trainable Parameters: 592,130
55
+ 2026-04-16 10:18:36,712 - INFO - train_pipeline - Non-trainable Parameters: 124,055,040
56
+ 2026-04-16 10:18:36,713 - INFO - train_pipeline - ===== Tokenizer Summary =====
57
+ 2026-04-16 10:18:36,732 - INFO - train_pipeline - Vocab size: 50265 | Special tokens: ['<s>', '</s>', '<unk>', '<pad>', '<mask>']
58
+ 2026-04-16 10:18:36,734 - INFO - train_pipeline - ===== End of Architecture Log =====
59
+ 2026-04-16 10:18:36,735 - INFO - train_pipeline - Data augmentation enabled (rename=0.3, format=0.3)
60
+ 2026-04-16 10:18:38,005 - INFO - train_pipeline - === Starting training with robust regularisation ===