dzungpham commited on 1 day ago

Commit

f81d183

verified ·

1 Parent(s): 8cafee4

upload graphcodebert robust, best f1 score at 0.54 at robust checkpoint 200

Browse files

Files changed (38) hide show

graphcodebert-robust/checkpoint-1000/config.json +28 -0
graphcodebert-robust/checkpoint-1000/merges.txt +0 -0
graphcodebert-robust/checkpoint-1000/model.safetensors +3 -0
graphcodebert-robust/checkpoint-1000/optimizer.pt +3 -0
graphcodebert-robust/checkpoint-1000/rng_state.pth +3 -0
graphcodebert-robust/checkpoint-1000/scaler.pt +3 -0
graphcodebert-robust/checkpoint-1000/scheduler.pt +3 -0
graphcodebert-robust/checkpoint-1000/special_tokens_map.json +51 -0
graphcodebert-robust/checkpoint-1000/tokenizer.json +0 -0
graphcodebert-robust/checkpoint-1000/tokenizer_config.json +58 -0
graphcodebert-robust/checkpoint-1000/trainer_state.json +753 -0
graphcodebert-robust/checkpoint-1000/training_args.bin +3 -0
graphcodebert-robust/checkpoint-1000/vocab.json +0 -0
graphcodebert-robust/checkpoint-400/model.safetensors +1 -1
graphcodebert-robust/checkpoint-400/optimizer.pt +1 -1
graphcodebert-robust/checkpoint-400/rng_state.pth +2 -2
graphcodebert-robust/checkpoint-400/scaler.pt +1 -1
graphcodebert-robust/checkpoint-400/scheduler.pt +1 -1
graphcodebert-robust/checkpoint-400/tokenizer.json +1 -6
graphcodebert-robust/checkpoint-400/trainer_state.json +63 -63
graphcodebert-robust/checkpoint-400/training_args.bin +1 -1
graphcodebert-robust/checkpoint-600/model.safetensors +1 -1
graphcodebert-robust/checkpoint-600/optimizer.pt +1 -1
graphcodebert-robust/checkpoint-600/rng_state.pth +2 -2
graphcodebert-robust/checkpoint-600/scaler.pt +1 -1
graphcodebert-robust/checkpoint-600/scheduler.pt +1 -1
graphcodebert-robust/checkpoint-600/tokenizer.json +1 -6
graphcodebert-robust/checkpoint-600/trainer_state.json +123 -123
graphcodebert-robust/checkpoint-600/training_args.bin +1 -1
graphcodebert-robust/checkpoint-800/model.safetensors +1 -1
graphcodebert-robust/checkpoint-800/optimizer.pt +1 -1
graphcodebert-robust/checkpoint-800/rng_state.pth +2 -2
graphcodebert-robust/checkpoint-800/scaler.pt +1 -1
graphcodebert-robust/checkpoint-800/scheduler.pt +1 -1
graphcodebert-robust/checkpoint-800/tokenizer.json +1 -6
graphcodebert-robust/checkpoint-800/trainer_state.json +183 -183
graphcodebert-robust/checkpoint-800/training_args.bin +1 -1
graphcodebert-robust/training.log +16 -16

graphcodebert-robust/checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "architectures": [
+    "RobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.2,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.2,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.56.0",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50265
+}

graphcodebert-robust/checkpoint-1000/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

graphcodebert-robust/checkpoint-1000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1775ae2be975414f5d11bf15988d0aec2d616f5a0808295653aacc889c6cfc71
+size 498612824

graphcodebert-robust/checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e2bac1ac68b08957d3cdb3fda4647a858725c580b3d284372c167bdacd54196
+size 4741923

graphcodebert-robust/checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0f22ce42d575ecb5c503c0a6f1ea1c31f0d2f31df8668facc18e860c7d106ec
+size 14581

graphcodebert-robust/checkpoint-1000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
+size 1383

graphcodebert-robust/checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf09df71d295b44fbffcc7f812f5e2732486c17d57994a3f0f366c7c7a6b5b97
+size 1465

graphcodebert-robust/checkpoint-1000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

graphcodebert-robust/checkpoint-1000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

graphcodebert-robust/checkpoint-1000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

graphcodebert-robust/checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,753 @@

+{
+  "best_global_step": 1000,
+  "best_metric": 0.7549859375827388,
+  "best_model_checkpoint": "./output_checkpoints/graphcodebert-robust/checkpoint-1000",
+  "epoch": 0.064,
+  "eval_steps": 1000,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.00064,
+      "grad_norm": 1.6144306659698486,
+      "learning_rate": 1.1520000000000002e-08,
+      "loss": 0.729,
+      "step": 10
+    },
+    {
+      "epoch": 0.00128,
+      "grad_norm": 2.0952296257019043,
+      "learning_rate": 2.4320000000000002e-08,
+      "loss": 0.7295,
+      "step": 20
+    },
+    {
+      "epoch": 0.00192,
+      "grad_norm": 1.3587689399719238,
+      "learning_rate": 3.7120000000000004e-08,
+      "loss": 0.73,
+      "step": 30
+    },
+    {
+      "epoch": 0.00256,
+      "grad_norm": 1.2531732320785522,
+      "learning_rate": 4.9920000000000006e-08,
+      "loss": 0.7221,
+      "step": 40
+    },
+    {
+      "epoch": 0.0032,
+      "grad_norm": 1.437932014465332,
+      "learning_rate": 6.272000000000001e-08,
+      "loss": 0.7209,
+      "step": 50
+    },
+    {
+      "epoch": 0.00384,
+      "grad_norm": 1.418426752090454,
+      "learning_rate": 7.552e-08,
+      "loss": 0.729,
+      "step": 60
+    },
+    {
+      "epoch": 0.00448,
+      "grad_norm": 1.9476298093795776,
+      "learning_rate": 8.832e-08,
+      "loss": 0.7242,
+      "step": 70
+    },
+    {
+      "epoch": 0.00512,
+      "grad_norm": 1.7948051691055298,
+      "learning_rate": 1.0112000000000001e-07,
+      "loss": 0.7227,
+      "step": 80
+    },
+    {
+      "epoch": 0.00576,
+      "grad_norm": 1.6534360647201538,
+      "learning_rate": 1.1392e-07,
+      "loss": 0.7234,
+      "step": 90
+    },
+    {
+      "epoch": 0.0064,
+      "grad_norm": 1.0920158624649048,
+      "learning_rate": 1.2672e-07,
+      "loss": 0.7328,
+      "step": 100
+    },
+    {
+      "epoch": 0.00704,
+      "grad_norm": 1.977837085723877,
+      "learning_rate": 1.3952000000000002e-07,
+      "loss": 0.7263,
+      "step": 110
+    },
+    {
+      "epoch": 0.00768,
+      "grad_norm": 1.388983130455017,
+      "learning_rate": 1.5232000000000003e-07,
+      "loss": 0.7286,
+      "step": 120
+    },
+    {
+      "epoch": 0.00832,
+      "grad_norm": 1.2956682443618774,
+      "learning_rate": 1.6512e-07,
+      "loss": 0.7251,
+      "step": 130
+    },
+    {
+      "epoch": 0.00896,
+      "grad_norm": 1.8125052452087402,
+      "learning_rate": 1.7792e-07,
+      "loss": 0.7251,
+      "step": 140
+    },
+    {
+      "epoch": 0.0096,
+      "grad_norm": 1.626846194267273,
+      "learning_rate": 1.9072e-07,
+      "loss": 0.727,
+      "step": 150
+    },
+    {
+      "epoch": 0.01024,
+      "grad_norm": 2.3243086338043213,
+      "learning_rate": 2.0352e-07,
+      "loss": 0.726,
+      "step": 160
+    },
+    {
+      "epoch": 0.01088,
+      "grad_norm": 1.4734737873077393,
+      "learning_rate": 2.1632e-07,
+      "loss": 0.7252,
+      "step": 170
+    },
+    {
+      "epoch": 0.01152,
+      "grad_norm": 2.090498685836792,
+      "learning_rate": 2.2912e-07,
+      "loss": 0.7273,
+      "step": 180
+    },
+    {
+      "epoch": 0.01216,
+      "grad_norm": 1.7563093900680542,
+      "learning_rate": 2.4192000000000004e-07,
+      "loss": 0.719,
+      "step": 190
+    },
+    {
+      "epoch": 0.0128,
+      "grad_norm": 1.449843168258667,
+      "learning_rate": 2.5472000000000005e-07,
+      "loss": 0.7237,
+      "step": 200
+    },
+    {
+      "epoch": 0.01344,
+      "grad_norm": 141396.296875,
+      "learning_rate": 5.350742447516642e-07,
+      "loss": 0.7217,
+      "step": 210
+    },
+    {
+      "epoch": 0.01408,
+      "grad_norm": 102339.1640625,
+      "learning_rate": 5.606758832565284e-07,
+      "loss": 0.7215,
+      "step": 220
+    },
+    {
+      "epoch": 0.01472,
+      "grad_norm": 134052.9375,
+      "learning_rate": 5.862775217613928e-07,
+      "loss": 0.7115,
+      "step": 230
+    },
+    {
+      "epoch": 0.01536,
+      "grad_norm": 87181.984375,
+      "learning_rate": 6.118791602662571e-07,
+      "loss": 0.7241,
+      "step": 240
+    },
+    {
+      "epoch": 0.016,
+      "grad_norm": 100231.328125,
+      "learning_rate": 6.374807987711214e-07,
+      "loss": 0.71,
+      "step": 250
+    },
+    {
+      "epoch": 0.01664,
+      "grad_norm": 136721.484375,
+      "learning_rate": 6.630824372759858e-07,
+      "loss": 0.7188,
+      "step": 260
+    },
+    {
+      "epoch": 0.01728,
+      "grad_norm": 115868.8125,
+      "learning_rate": 6.8868407578085e-07,
+      "loss": 0.7199,
+      "step": 270
+    },
+    {
+      "epoch": 0.01792,
+      "grad_norm": 70205.1484375,
+      "learning_rate": 7.142857142857143e-07,
+      "loss": 0.7299,
+      "step": 280
+    },
+    {
+      "epoch": 0.01856,
+      "grad_norm": 98926.4453125,
+      "learning_rate": 7.398873527905787e-07,
+      "loss": 0.7159,
+      "step": 290
+    },
+    {
+      "epoch": 0.0192,
+      "grad_norm": 134108.140625,
+      "learning_rate": 7.65488991295443e-07,
+      "loss": 0.7122,
+      "step": 300
+    },
+    {
+      "epoch": 0.01984,
+      "grad_norm": 103719.140625,
+      "learning_rate": 7.910906298003073e-07,
+      "loss": 0.7185,
+      "step": 310
+    },
+    {
+      "epoch": 0.02048,
+      "grad_norm": 85624.953125,
+      "learning_rate": 8.166922683051716e-07,
+      "loss": 0.718,
+      "step": 320
+    },
+    {
+      "epoch": 0.02112,
+      "grad_norm": 138824.15625,
+      "learning_rate": 8.422939068100359e-07,
+      "loss": 0.713,
+      "step": 330
+    },
+    {
+      "epoch": 0.02176,
+      "grad_norm": 73629.0859375,
+      "learning_rate": 8.678955453149002e-07,
+      "loss": 0.7186,
+      "step": 340
+    },
+    {
+      "epoch": 0.0224,
+      "grad_norm": 132493.0,
+      "learning_rate": 8.934971838197646e-07,
+      "loss": 0.7133,
+      "step": 350
+    },
+    {
+      "epoch": 0.02304,
+      "grad_norm": 85223.625,
+      "learning_rate": 9.190988223246289e-07,
+      "loss": 0.7124,
+      "step": 360
+    },
+    {
+      "epoch": 0.02368,
+      "grad_norm": 77868.78125,
+      "learning_rate": 9.447004608294931e-07,
+      "loss": 0.7058,
+      "step": 370
+    },
+    {
+      "epoch": 0.02432,
+      "grad_norm": 75874.3046875,
+      "learning_rate": 9.703020993343575e-07,
+      "loss": 0.7139,
+      "step": 380
+    },
+    {
+      "epoch": 0.02496,
+      "grad_norm": 151937.703125,
+      "learning_rate": 9.959037378392218e-07,
+      "loss": 0.713,
+      "step": 390
+    },
+    {
+      "epoch": 0.0256,
+      "grad_norm": 161711.671875,
+      "learning_rate": 1.021505376344086e-06,
+      "loss": 0.7137,
+      "step": 400
+    },
+    {
+      "epoch": 0.02624,
+      "grad_norm": 90800.234375,
+      "learning_rate": 1.0471070148489503e-06,
+      "loss": 0.7091,
+      "step": 410
+    },
+    {
+      "epoch": 0.02688,
+      "grad_norm": 82131.34375,
+      "learning_rate": 1.0727086533538148e-06,
+      "loss": 0.7098,
+      "step": 420
+    },
+    {
+      "epoch": 0.02752,
+      "grad_norm": 92818.9140625,
+      "learning_rate": 1.0983102918586791e-06,
+      "loss": 0.7099,
+      "step": 430
+    },
+    {
+      "epoch": 0.02816,
+      "grad_norm": 88555.5078125,
+      "learning_rate": 1.1239119303635434e-06,
+      "loss": 0.7086,
+      "step": 440
+    },
+    {
+      "epoch": 0.0288,
+      "grad_norm": 73428.6015625,
+      "learning_rate": 1.1495135688684077e-06,
+      "loss": 0.7117,
+      "step": 450
+    },
+    {
+      "epoch": 0.02944,
+      "grad_norm": 128938.7421875,
+      "learning_rate": 1.175115207373272e-06,
+      "loss": 0.7182,
+      "step": 460
+    },
+    {
+      "epoch": 0.03008,
+      "grad_norm": 102742.3359375,
+      "learning_rate": 1.2007168458781362e-06,
+      "loss": 0.7108,
+      "step": 470
+    },
+    {
+      "epoch": 0.03072,
+      "grad_norm": 73825.8125,
+      "learning_rate": 1.2263184843830007e-06,
+      "loss": 0.7087,
+      "step": 480
+    },
+    {
+      "epoch": 0.03136,
+      "grad_norm": 110930.75,
+      "learning_rate": 1.251920122887865e-06,
+      "loss": 0.7232,
+      "step": 490
+    },
+    {
+      "epoch": 0.032,
+      "grad_norm": 95068.84375,
+      "learning_rate": 1.2775217613927293e-06,
+      "loss": 0.703,
+      "step": 500
+    },
+    {
+      "epoch": 0.03264,
+      "grad_norm": 118731.9296875,
+      "learning_rate": 1.3031233998975938e-06,
+      "loss": 0.7063,
+      "step": 510
+    },
+    {
+      "epoch": 0.03328,
+      "grad_norm": 80511.828125,
+      "learning_rate": 1.3287250384024578e-06,
+      "loss": 0.7143,
+      "step": 520
+    },
+    {
+      "epoch": 0.03392,
+      "grad_norm": 84864.484375,
+      "learning_rate": 1.354326676907322e-06,
+      "loss": 0.7055,
+      "step": 530
+    },
+    {
+      "epoch": 0.03456,
+      "grad_norm": 107800.109375,
+      "learning_rate": 1.3799283154121864e-06,
+      "loss": 0.7119,
+      "step": 540
+    },
+    {
+      "epoch": 0.0352,
+      "grad_norm": 83667.671875,
+      "learning_rate": 1.4055299539170509e-06,
+      "loss": 0.7082,
+      "step": 550
+    },
+    {
+      "epoch": 0.03584,
+      "grad_norm": 75656.4140625,
+      "learning_rate": 1.4311315924219151e-06,
+      "loss": 0.7062,
+      "step": 560
+    },
+    {
+      "epoch": 0.03648,
+      "grad_norm": 79985.875,
+      "learning_rate": 1.4567332309267796e-06,
+      "loss": 0.7155,
+      "step": 570
+    },
+    {
+      "epoch": 0.03712,
+      "grad_norm": 76334.078125,
+      "learning_rate": 1.4823348694316437e-06,
+      "loss": 0.7075,
+      "step": 580
+    },
+    {
+      "epoch": 0.03776,
+      "grad_norm": 140764.03125,
+      "learning_rate": 1.507936507936508e-06,
+      "loss": 0.7065,
+      "step": 590
+    },
+    {
+      "epoch": 0.0384,
+      "grad_norm": 100877.296875,
+      "learning_rate": 1.5335381464413722e-06,
+      "loss": 0.7096,
+      "step": 600
+    },
+    {
+      "epoch": 0.03904,
+      "grad_norm": 104088.1171875,
+      "learning_rate": 1.5591397849462367e-06,
+      "loss": 0.6987,
+      "step": 610
+    },
+    {
+      "epoch": 0.03968,
+      "grad_norm": 80806.2265625,
+      "learning_rate": 1.584741423451101e-06,
+      "loss": 0.707,
+      "step": 620
+    },
+    {
+      "epoch": 0.04032,
+      "grad_norm": 109884.765625,
+      "learning_rate": 1.6103430619559655e-06,
+      "loss": 0.6991,
+      "step": 630
+    },
+    {
+      "epoch": 0.04096,
+      "grad_norm": 79944.890625,
+      "learning_rate": 1.6359447004608298e-06,
+      "loss": 0.7047,
+      "step": 640
+    },
+    {
+      "epoch": 0.0416,
+      "grad_norm": 93673.3828125,
+      "learning_rate": 1.6615463389656938e-06,
+      "loss": 0.6971,
+      "step": 650
+    },
+    {
+      "epoch": 0.04224,
+      "grad_norm": 76641.265625,
+      "learning_rate": 1.6871479774705581e-06,
+      "loss": 0.6957,
+      "step": 660
+    },
+    {
+      "epoch": 0.04288,
+      "grad_norm": 73583.5546875,
+      "learning_rate": 1.7127496159754226e-06,
+      "loss": 0.7028,
+      "step": 670
+    },
+    {
+      "epoch": 0.04352,
+      "grad_norm": 75177.9609375,
+      "learning_rate": 1.7383512544802869e-06,
+      "loss": 0.7012,
+      "step": 680
+    },
+    {
+      "epoch": 0.04416,
+      "grad_norm": 78340.8515625,
+      "learning_rate": 1.7639528929851512e-06,
+      "loss": 0.6987,
+      "step": 690
+    },
+    {
+      "epoch": 0.0448,
+      "grad_norm": 86004.1171875,
+      "learning_rate": 1.7895545314900157e-06,
+      "loss": 0.7061,
+      "step": 700
+    },
+    {
+      "epoch": 0.04544,
+      "grad_norm": 94212.0390625,
+      "learning_rate": 1.8151561699948797e-06,
+      "loss": 0.6993,
+      "step": 710
+    },
+    {
+      "epoch": 0.04608,
+      "grad_norm": 83918.2421875,
+      "learning_rate": 1.840757808499744e-06,
+      "loss": 0.7009,
+      "step": 720
+    },
+    {
+      "epoch": 0.04672,
+      "grad_norm": 68374.3125,
+      "learning_rate": 1.8663594470046085e-06,
+      "loss": 0.6964,
+      "step": 730
+    },
+    {
+      "epoch": 0.04736,
+      "grad_norm": 90348.78125,
+      "learning_rate": 1.8919610855094728e-06,
+      "loss": 0.7011,
+      "step": 740
+    },
+    {
+      "epoch": 0.048,
+      "grad_norm": 146658.0,
+      "learning_rate": 1.9175627240143373e-06,
+      "loss": 0.7003,
+      "step": 750
+    },
+    {
+      "epoch": 0.04864,
+      "grad_norm": 112037.1640625,
+      "learning_rate": 1.9431643625192015e-06,
+      "loss": 0.7051,
+      "step": 760
+    },
+    {
+      "epoch": 0.04928,
+      "grad_norm": 70628.625,
+      "learning_rate": 1.9687660010240654e-06,
+      "loss": 0.6923,
+      "step": 770
+    },
+    {
+      "epoch": 0.04992,
+      "grad_norm": 109922.125,
+      "learning_rate": 1.99436763952893e-06,
+      "loss": 0.6893,
+      "step": 780
+    },
+    {
+      "epoch": 0.05056,
+      "grad_norm": 135306.375,
+      "learning_rate": 2.0199692780337944e-06,
+      "loss": 0.7008,
+      "step": 790
+    },
+    {
+      "epoch": 0.0512,
+      "grad_norm": 82354.8046875,
+      "learning_rate": 2.0455709165386586e-06,
+      "loss": 0.705,
+      "step": 800
+    },
+    {
+      "epoch": 0.05184,
+      "grad_norm": 95951.671875,
+      "learning_rate": 2.071172555043523e-06,
+      "loss": 0.6912,
+      "step": 810
+    },
+    {
+      "epoch": 0.05248,
+      "grad_norm": 96797.4609375,
+      "learning_rate": 2.096774193548387e-06,
+      "loss": 0.6922,
+      "step": 820
+    },
+    {
+      "epoch": 0.05312,
+      "grad_norm": 87190.625,
+      "learning_rate": 2.122375832053252e-06,
+      "loss": 0.6946,
+      "step": 830
+    },
+    {
+      "epoch": 0.05376,
+      "grad_norm": 87958.5625,
+      "learning_rate": 2.1479774705581158e-06,
+      "loss": 0.6949,
+      "step": 840
+    },
+    {
+      "epoch": 0.0544,
+      "grad_norm": 77217.1796875,
+      "learning_rate": 2.17357910906298e-06,
+      "loss": 0.6928,
+      "step": 850
+    },
+    {
+      "epoch": 0.05504,
+      "grad_norm": 117156.5546875,
+      "learning_rate": 2.1991807475678443e-06,
+      "loss": 0.692,
+      "step": 860
+    },
+    {
+      "epoch": 0.05568,
+      "grad_norm": 94618.6875,
+      "learning_rate": 2.224782386072709e-06,
+      "loss": 0.6976,
+      "step": 870
+    },
+    {
+      "epoch": 0.05632,
+      "grad_norm": 71444.6484375,
+      "learning_rate": 2.2503840245775733e-06,
+      "loss": 0.6989,
+      "step": 880
+    },
+    {
+      "epoch": 0.05696,
+      "grad_norm": 159991.609375,
+      "learning_rate": 2.2759856630824376e-06,
+      "loss": 0.6928,
+      "step": 890
+    },
+    {
+      "epoch": 0.0576,
+      "grad_norm": 81899.6875,
+      "learning_rate": 2.301587301587302e-06,
+      "loss": 0.691,
+      "step": 900
+    },
+    {
+      "epoch": 0.05824,
+      "grad_norm": 110817.3671875,
+      "learning_rate": 2.327188940092166e-06,
+      "loss": 0.6858,
+      "step": 910
+    },
+    {
+      "epoch": 0.05888,
+      "grad_norm": 105698.109375,
+      "learning_rate": 2.3527905785970304e-06,
+      "loss": 0.6965,
+      "step": 920
+    },
+    {
+      "epoch": 0.05952,
+      "grad_norm": 76475.0,
+      "learning_rate": 2.3783922171018947e-06,
+      "loss": 0.6901,
+      "step": 930
+    },
+    {
+      "epoch": 0.06016,
+      "grad_norm": 96672.6796875,
+      "learning_rate": 2.403993855606759e-06,
+      "loss": 0.6908,
+      "step": 940
+    },
+    {
+      "epoch": 0.0608,
+      "grad_norm": 114510.8125,
+      "learning_rate": 2.4295954941116232e-06,
+      "loss": 0.6904,
+      "step": 950
+    },
+    {
+      "epoch": 0.06144,
+      "grad_norm": 62412.4375,
+      "learning_rate": 2.455197132616488e-06,
+      "loss": 0.6855,
+      "step": 960
+    },
+    {
+      "epoch": 0.06208,
+      "grad_norm": 92860.7109375,
+      "learning_rate": 2.4807987711213518e-06,
+      "loss": 0.6752,
+      "step": 970
+    },
+    {
+      "epoch": 0.06272,
+      "grad_norm": 75184.359375,
+      "learning_rate": 2.506400409626216e-06,
+      "loss": 0.6868,
+      "step": 980
+    },
+    {
+      "epoch": 0.06336,
+      "grad_norm": 77771.1640625,
+      "learning_rate": 2.5320020481310808e-06,
+      "loss": 0.6941,
+      "step": 990
+    },
+    {
+      "epoch": 0.064,
+      "grad_norm": 65366.796875,
+      "learning_rate": 2.557603686635945e-06,
+      "loss": 0.6808,
+      "step": 1000
+    },
+    {
+      "epoch": 0.064,
+      "eval_accuracy": 0.75744,
+      "eval_loss": 0.6539617776870728,
+      "eval_macro_f1": 0.7549859375827388,
+      "eval_runtime": 1576.6702,
+      "eval_samples_per_second": 63.425,
+      "eval_steps_per_second": 0.496,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 78125,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8417004883171200.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

graphcodebert-robust/checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5fc4023adc281644437a690ea6a6001846d7762699cd428d4ad38e1888076db
+size 5841

graphcodebert-robust/checkpoint-1000/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

graphcodebert-robust/checkpoint-400/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e4e748b25483175160cea7725c3f8f0878d2cab69bc662e854fb2f2191256cd
 size 498612824

 version https://git-lfs.github.com/spec/v1
+oid sha256:7a01766ea37053c4e1086db23a592ccd390b6f66d530273ae2dae69fbf9aa39e
 size 498612824

graphcodebert-robust/checkpoint-400/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f90982cf6a84b33871eaabef13bee4999efc870036a33418fc702291783e6ef
 size 4741923

 version https://git-lfs.github.com/spec/v1
+oid sha256:3993e14f8e5395da15ce3350b7a6c24a8b0c21921fd8cce7a29d5175f071b2fc
 size 4741923

graphcodebert-robust/checkpoint-400/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db236f43af2a1e18f8bd14b48ca1899a08ea03909f6a24acd7f544ce3ee66296
-size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ab60503702bb1354c5765d2c7d1ba9f47491e07ac8864941c7126246dccd968
+size 14581

graphcodebert-robust/checkpoint-400/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:30858f23bcb22d0baef45bd4add9d6fa474141308c12653c706077b87d932e49
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
 size 1383

graphcodebert-robust/checkpoint-400/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69fd2a2128ae1d3fdad80684195acce5b8c4cb843627b5e9241d19cba08e96e4
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:99753ecc9725cb463a1acc03fa95671b59d366ed45a71854383d0a8e379a982d
 size 1465

graphcodebert-robust/checkpoint-400/tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 512,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

graphcodebert-robust/checkpoint-400/trainer_state.json CHANGED Viewed

@@ -151,149 +151,149 @@
     },
     {
       "epoch": 0.01344,
-      "grad_norm": 2.1326472759246826,
-      "learning_rate": 2.6752000000000006e-07,
-      "loss": 0.7305,
       "step": 210
     },
     {
       "epoch": 0.01408,
-      "grad_norm": 2.21703839302063,
-      "learning_rate": 2.8032e-07,
-      "loss": 0.7167,
       "step": 220
     },
     {
       "epoch": 0.01472,
-      "grad_norm": 1.6385700702667236,
-      "learning_rate": 2.9312e-07,
-      "loss": 0.7209,
       "step": 230
     },
     {
       "epoch": 0.01536,
-      "grad_norm": 1.4293471574783325,
-      "learning_rate": 3.0592000000000003e-07,
-      "loss": 0.722,
       "step": 240
     },
     {
       "epoch": 0.016,
-      "grad_norm": 2.1437904834747314,
-      "learning_rate": 3.1872e-07,
-      "loss": 0.717,
       "step": 250
     },
     {
       "epoch": 0.01664,
-      "grad_norm": 2.014806032180786,
-      "learning_rate": 3.3152000000000005e-07,
-      "loss": 0.7182,
       "step": 260
     },
     {
       "epoch": 0.01728,
-      "grad_norm": 1.7216386795043945,
-      "learning_rate": 3.4432e-07,
-      "loss": 0.7253,
       "step": 270
     },
     {
       "epoch": 0.01792,
-      "grad_norm": 1.4267009496688843,
-      "learning_rate": 3.5712e-07,
-      "loss": 0.7189,
       "step": 280
     },
     {
       "epoch": 0.01856,
-      "grad_norm": 2.222503185272217,
-      "learning_rate": 3.6992e-07,
-      "loss": 0.7198,
       "step": 290
     },
     {
       "epoch": 0.0192,
-      "grad_norm": 1.578922986984253,
-      "learning_rate": 3.8272000000000003e-07,
-      "loss": 0.717,
       "step": 300
     },
     {
       "epoch": 0.01984,
-      "grad_norm": 1.719905972480774,
-      "learning_rate": 3.9552e-07,
-      "loss": 0.709,
       "step": 310
     },
     {
       "epoch": 0.02048,
-      "grad_norm": 1.4473963975906372,
-      "learning_rate": 4.0832000000000005e-07,
-      "loss": 0.7215,
       "step": 320
     },
     {
       "epoch": 0.02112,
-      "grad_norm": 2.1639790534973145,
-      "learning_rate": 4.2112e-07,
-      "loss": 0.7175,
       "step": 330
     },
     {
       "epoch": 0.02176,
-      "grad_norm": 1.2387958765029907,
-      "learning_rate": 4.3392e-07,
-      "loss": 0.7129,
       "step": 340
     },
     {
       "epoch": 0.0224,
-      "grad_norm": 2.2797842025756836,
-      "learning_rate": 4.4672000000000007e-07,
-      "loss": 0.7159,
       "step": 350
     },
     {
       "epoch": 0.02304,
-      "grad_norm": 1.5692473649978638,
-      "learning_rate": 4.5952000000000003e-07,
-      "loss": 0.7161,
       "step": 360
     },
     {
       "epoch": 0.02368,
-      "grad_norm": 1.4270817041397095,
-      "learning_rate": 4.723200000000001e-07,
-      "loss": 0.7114,
       "step": 370
     },
     {
       "epoch": 0.02432,
-      "grad_norm": 1.4091335535049438,
-      "learning_rate": 4.8512e-07,
-      "loss": 0.7127,
       "step": 380
     },
     {
       "epoch": 0.02496,
-      "grad_norm": 1.8862844705581665,
-      "learning_rate": 4.979200000000001e-07,
-      "loss": 0.7153,
       "step": 390
     },
     {
       "epoch": 0.0256,
-      "grad_norm": 1.9264376163482666,
-      "learning_rate": 5.107200000000001e-07,
-      "loss": 0.7109,
       "step": 400
     }
   ],
   "logging_steps": 10,
-  "max_steps": 156250,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 10,
   "save_steps": 200,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {
@@ -316,7 +316,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3367821508608000.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

     },
     {
       "epoch": 0.01344,
+      "grad_norm": 141396.296875,
+      "learning_rate": 5.350742447516642e-07,
+      "loss": 0.7217,
       "step": 210
     },
     {
       "epoch": 0.01408,
+      "grad_norm": 102339.1640625,
+      "learning_rate": 5.606758832565284e-07,
+      "loss": 0.7215,
       "step": 220
     },
     {
       "epoch": 0.01472,
+      "grad_norm": 134052.9375,
+      "learning_rate": 5.862775217613928e-07,
+      "loss": 0.7115,
       "step": 230
     },
     {
       "epoch": 0.01536,
+      "grad_norm": 87181.984375,
+      "learning_rate": 6.118791602662571e-07,
+      "loss": 0.7241,
       "step": 240
     },
     {
       "epoch": 0.016,
+      "grad_norm": 100231.328125,
+      "learning_rate": 6.374807987711214e-07,
+      "loss": 0.71,
       "step": 250
     },
     {
       "epoch": 0.01664,
+      "grad_norm": 136721.484375,
+      "learning_rate": 6.630824372759858e-07,
+      "loss": 0.7188,
       "step": 260
     },
     {
       "epoch": 0.01728,
+      "grad_norm": 115868.8125,
+      "learning_rate": 6.8868407578085e-07,
+      "loss": 0.7199,
       "step": 270
     },
     {
       "epoch": 0.01792,
+      "grad_norm": 70205.1484375,
+      "learning_rate": 7.142857142857143e-07,
+      "loss": 0.7299,
       "step": 280
     },
     {
       "epoch": 0.01856,
+      "grad_norm": 98926.4453125,
+      "learning_rate": 7.398873527905787e-07,
+      "loss": 0.7159,
       "step": 290
     },
     {
       "epoch": 0.0192,
+      "grad_norm": 134108.140625,
+      "learning_rate": 7.65488991295443e-07,
+      "loss": 0.7122,
       "step": 300
     },
     {
       "epoch": 0.01984,
+      "grad_norm": 103719.140625,
+      "learning_rate": 7.910906298003073e-07,
+      "loss": 0.7185,
       "step": 310
     },
     {
       "epoch": 0.02048,
+      "grad_norm": 85624.953125,
+      "learning_rate": 8.166922683051716e-07,
+      "loss": 0.718,
       "step": 320
     },
     {
       "epoch": 0.02112,
+      "grad_norm": 138824.15625,
+      "learning_rate": 8.422939068100359e-07,
+      "loss": 0.713,
       "step": 330
     },
     {
       "epoch": 0.02176,
+      "grad_norm": 73629.0859375,
+      "learning_rate": 8.678955453149002e-07,
+      "loss": 0.7186,
       "step": 340
     },
     {
       "epoch": 0.0224,
+      "grad_norm": 132493.0,
+      "learning_rate": 8.934971838197646e-07,
+      "loss": 0.7133,
       "step": 350
     },
     {
       "epoch": 0.02304,
+      "grad_norm": 85223.625,
+      "learning_rate": 9.190988223246289e-07,
+      "loss": 0.7124,
       "step": 360
     },
     {
       "epoch": 0.02368,
+      "grad_norm": 77868.78125,
+      "learning_rate": 9.447004608294931e-07,
+      "loss": 0.7058,
       "step": 370
     },
     {
       "epoch": 0.02432,
+      "grad_norm": 75874.3046875,
+      "learning_rate": 9.703020993343575e-07,
+      "loss": 0.7139,
       "step": 380
     },
     {
       "epoch": 0.02496,
+      "grad_norm": 151937.703125,
+      "learning_rate": 9.959037378392218e-07,
+      "loss": 0.713,
       "step": 390
     },
     {
       "epoch": 0.0256,
+      "grad_norm": 161711.671875,
+      "learning_rate": 1.021505376344086e-06,
+      "loss": 0.7137,
       "step": 400
     }
   ],
   "logging_steps": 10,
+  "max_steps": 78125,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
   "save_steps": 200,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {
       "attributes": {}
     }
   },
+  "total_flos": 3367295286497280.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

graphcodebert-robust/checkpoint-400/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ff659b85d84ec0bae53596bc271ba773db9c463626db0f13fd8e747f433dad4
 size 5841

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5fc4023adc281644437a690ea6a6001846d7762699cd428d4ad38e1888076db
 size 5841

graphcodebert-robust/checkpoint-600/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33d1e5063213f214a9f1effa3c1d7fdca40af6b0941bb37ae0f1a6239c90b3c4
 size 498612824

 version https://git-lfs.github.com/spec/v1
+oid sha256:75ec427b92df30abfd117ca61bf8855a95bff5b8e2f300c83f23131aa83f89a3
 size 498612824

graphcodebert-robust/checkpoint-600/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:787767f32f92db97a85e79ae7369b941462a2aa040ad04230091e634625d1bd5
 size 4741923

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6eaf9c7a3d50e76cca47c4da094a2db7ca99a2b289f3509dc98882e9debad13
 size 4741923

graphcodebert-robust/checkpoint-600/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae074b64b04f15c65bed20fbc593949760914672b525152439949dbdeac14c41
-size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:820bebfae8bbd9452955c53efeeb042e6227f4bb5c733fac637c835bd717c752
+size 14581

graphcodebert-robust/checkpoint-600/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb7fde5111803012042c93a73aa191336bb6e10b3ad44f6bd1d94fc7008a22b6
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
 size 1383

graphcodebert-robust/checkpoint-600/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae70cc056ae3330cc58f33660559174defa991e45f91baa83a3ceffabb8b19fd
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:abc0eb96c2d3f04dd37bcd945b0c2a2b0de8956916d0c07353bb361443cea60c
 size 1465

graphcodebert-robust/checkpoint-600/tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 512,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

graphcodebert-robust/checkpoint-600/trainer_state.json CHANGED Viewed

@@ -151,289 +151,289 @@
     },
     {
       "epoch": 0.01344,
-      "grad_norm": 2.1326472759246826,
-      "learning_rate": 2.6752000000000006e-07,
-      "loss": 0.7305,
       "step": 210
     },
     {
       "epoch": 0.01408,
-      "grad_norm": 2.21703839302063,
-      "learning_rate": 2.8032e-07,
-      "loss": 0.7167,
       "step": 220
     },
     {
       "epoch": 0.01472,
-      "grad_norm": 1.6385700702667236,
-      "learning_rate": 2.9312e-07,
-      "loss": 0.7209,
       "step": 230
     },
     {
       "epoch": 0.01536,
-      "grad_norm": 1.4293471574783325,
-      "learning_rate": 3.0592000000000003e-07,
-      "loss": 0.722,
       "step": 240
     },
     {
       "epoch": 0.016,
-      "grad_norm": 2.1437904834747314,
-      "learning_rate": 3.1872e-07,
-      "loss": 0.717,
       "step": 250
     },
     {
       "epoch": 0.01664,
-      "grad_norm": 2.014806032180786,
-      "learning_rate": 3.3152000000000005e-07,
-      "loss": 0.7182,
       "step": 260
     },
     {
       "epoch": 0.01728,
-      "grad_norm": 1.7216386795043945,
-      "learning_rate": 3.4432e-07,
-      "loss": 0.7253,
       "step": 270
     },
     {
       "epoch": 0.01792,
-      "grad_norm": 1.4267009496688843,
-      "learning_rate": 3.5712e-07,
-      "loss": 0.7189,
       "step": 280
     },
     {
       "epoch": 0.01856,
-      "grad_norm": 2.222503185272217,
-      "learning_rate": 3.6992e-07,
-      "loss": 0.7198,
       "step": 290
     },
     {
       "epoch": 0.0192,
-      "grad_norm": 1.578922986984253,
-      "learning_rate": 3.8272000000000003e-07,
-      "loss": 0.717,
       "step": 300
     },
     {
       "epoch": 0.01984,
-      "grad_norm": 1.719905972480774,
-      "learning_rate": 3.9552e-07,
-      "loss": 0.709,
       "step": 310
     },
     {
       "epoch": 0.02048,
-      "grad_norm": 1.4473963975906372,
-      "learning_rate": 4.0832000000000005e-07,
-      "loss": 0.7215,
       "step": 320
     },
     {
       "epoch": 0.02112,
-      "grad_norm": 2.1639790534973145,
-      "learning_rate": 4.2112e-07,
-      "loss": 0.7175,
       "step": 330
     },
     {
       "epoch": 0.02176,
-      "grad_norm": 1.2387958765029907,
-      "learning_rate": 4.3392e-07,
-      "loss": 0.7129,
       "step": 340
     },
     {
       "epoch": 0.0224,
-      "grad_norm": 2.2797842025756836,
-      "learning_rate": 4.4672000000000007e-07,
-      "loss": 0.7159,
       "step": 350
     },
     {
       "epoch": 0.02304,
-      "grad_norm": 1.5692473649978638,
-      "learning_rate": 4.5952000000000003e-07,
-      "loss": 0.7161,
       "step": 360
     },
     {
       "epoch": 0.02368,
-      "grad_norm": 1.4270817041397095,
-      "learning_rate": 4.723200000000001e-07,
-      "loss": 0.7114,
       "step": 370
     },
     {
       "epoch": 0.02432,
-      "grad_norm": 1.4091335535049438,
-      "learning_rate": 4.8512e-07,
-      "loss": 0.7127,
       "step": 380
     },
     {
       "epoch": 0.02496,
-      "grad_norm": 1.8862844705581665,
-      "learning_rate": 4.979200000000001e-07,
-      "loss": 0.7153,
       "step": 390
     },
     {
       "epoch": 0.0256,
-      "grad_norm": 1.9264376163482666,
-      "learning_rate": 5.107200000000001e-07,
-      "loss": 0.7109,
       "step": 400
     },
     {
       "epoch": 0.02624,
-      "grad_norm": 1.4058727025985718,
-      "learning_rate": 5.235200000000001e-07,
-      "loss": 0.705,
       "step": 410
     },
     {
       "epoch": 0.02688,
-      "grad_norm": 1.519445776939392,
-      "learning_rate": 5.363200000000001e-07,
-      "loss": 0.7131,
       "step": 420
     },
     {
       "epoch": 0.02752,
-      "grad_norm": 1.6636698246002197,
-      "learning_rate": 5.491200000000001e-07,
-      "loss": 0.6916,
       "step": 430
     },
     {
       "epoch": 0.02816,
-      "grad_norm": 1.5472590923309326,
-      "learning_rate": 5.6192e-07,
-      "loss": 0.705,
       "step": 440
     },
     {
       "epoch": 0.0288,
-      "grad_norm": 1.4896206855773926,
-      "learning_rate": 5.747200000000001e-07,
-      "loss": 0.7046,
       "step": 450
     },
     {
       "epoch": 0.02944,
-      "grad_norm": 2.2565503120422363,
-      "learning_rate": 5.8752e-07,
-      "loss": 0.7009,
       "step": 460
     },
     {
       "epoch": 0.03008,
-      "grad_norm": 2.017638683319092,
-      "learning_rate": 6.0032e-07,
-      "loss": 0.7058,
       "step": 470
     },
     {
       "epoch": 0.03072,
-      "grad_norm": 1.3399696350097656,
-      "learning_rate": 6.1312e-07,
-      "loss": 0.7003,
       "step": 480
     },
     {
       "epoch": 0.03136,
-      "grad_norm": 1.3090866804122925,
-      "learning_rate": 6.2592e-07,
-      "loss": 0.7067,
       "step": 490
     },
     {
       "epoch": 0.032,
-      "grad_norm": 1.4199142456054688,
-      "learning_rate": 6.3872e-07,
-      "loss": 0.7008,
       "step": 500
     },
     {
       "epoch": 0.03264,
-      "grad_norm": 1.7174904346466064,
-      "learning_rate": 6.515200000000001e-07,
-      "loss": 0.7003,
       "step": 510
     },
     {
       "epoch": 0.03328,
-      "grad_norm": 1.2983943223953247,
-      "learning_rate": 6.643200000000001e-07,
-      "loss": 0.698,
       "step": 520
     },
     {
       "epoch": 0.03392,
-      "grad_norm": 1.8224154710769653,
-      "learning_rate": 6.7712e-07,
-      "loss": 0.7047,
       "step": 530
     },
     {
       "epoch": 0.03456,
-      "grad_norm": 1.3605278730392456,
-      "learning_rate": 6.899200000000001e-07,
-      "loss": 0.6974,
       "step": 540
     },
     {
       "epoch": 0.0352,
-      "grad_norm": 1.4932376146316528,
-      "learning_rate": 7.027200000000001e-07,
-      "loss": 0.6918,
       "step": 550
     },
     {
       "epoch": 0.03584,
-      "grad_norm": 1.2169368267059326,
-      "learning_rate": 7.155200000000001e-07,
-      "loss": 0.6996,
       "step": 560
     },
     {
       "epoch": 0.03648,
-      "grad_norm": 1.5690464973449707,
-      "learning_rate": 7.2832e-07,
-      "loss": 0.6942,
       "step": 570
     },
     {
       "epoch": 0.03712,
-      "grad_norm": 1.541991949081421,
-      "learning_rate": 7.4112e-07,
-      "loss": 0.6973,
       "step": 580
     },
     {
       "epoch": 0.03776,
-      "grad_norm": 1.7749661207199097,
-      "learning_rate": 7.5392e-07,
-      "loss": 0.6865,
       "step": 590
     },
     {
       "epoch": 0.0384,
-      "grad_norm": 1.2169281244277954,
-      "learning_rate": 7.667200000000001e-07,
-      "loss": 0.6876,
       "step": 600
     }
   ],
   "logging_steps": 10,
-  "max_steps": 156250,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 10,
   "save_steps": 200,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {
@@ -456,7 +456,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5051732262912000.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

     },
     {
       "epoch": 0.01344,
+      "grad_norm": 141396.296875,
+      "learning_rate": 5.350742447516642e-07,
+      "loss": 0.7217,
       "step": 210
     },
     {
       "epoch": 0.01408,
+      "grad_norm": 102339.1640625,
+      "learning_rate": 5.606758832565284e-07,
+      "loss": 0.7215,
       "step": 220
     },
     {
       "epoch": 0.01472,
+      "grad_norm": 134052.9375,
+      "learning_rate": 5.862775217613928e-07,
+      "loss": 0.7115,
       "step": 230
     },
     {
       "epoch": 0.01536,
+      "grad_norm": 87181.984375,
+      "learning_rate": 6.118791602662571e-07,
+      "loss": 0.7241,
       "step": 240
     },
     {
       "epoch": 0.016,
+      "grad_norm": 100231.328125,
+      "learning_rate": 6.374807987711214e-07,
+      "loss": 0.71,
       "step": 250
     },
     {
       "epoch": 0.01664,
+      "grad_norm": 136721.484375,
+      "learning_rate": 6.630824372759858e-07,
+      "loss": 0.7188,
       "step": 260
     },
     {
       "epoch": 0.01728,
+      "grad_norm": 115868.8125,
+      "learning_rate": 6.8868407578085e-07,
+      "loss": 0.7199,
       "step": 270
     },
     {
       "epoch": 0.01792,
+      "grad_norm": 70205.1484375,
+      "learning_rate": 7.142857142857143e-07,
+      "loss": 0.7299,
       "step": 280
     },
     {
       "epoch": 0.01856,
+      "grad_norm": 98926.4453125,
+      "learning_rate": 7.398873527905787e-07,
+      "loss": 0.7159,
       "step": 290
     },
     {
       "epoch": 0.0192,
+      "grad_norm": 134108.140625,
+      "learning_rate": 7.65488991295443e-07,
+      "loss": 0.7122,
       "step": 300
     },
     {
       "epoch": 0.01984,
+      "grad_norm": 103719.140625,
+      "learning_rate": 7.910906298003073e-07,
+      "loss": 0.7185,
       "step": 310
     },
     {
       "epoch": 0.02048,
+      "grad_norm": 85624.953125,
+      "learning_rate": 8.166922683051716e-07,
+      "loss": 0.718,
       "step": 320
     },
     {
       "epoch": 0.02112,
+      "grad_norm": 138824.15625,
+      "learning_rate": 8.422939068100359e-07,
+      "loss": 0.713,
       "step": 330
     },
     {
       "epoch": 0.02176,
+      "grad_norm": 73629.0859375,
+      "learning_rate": 8.678955453149002e-07,
+      "loss": 0.7186,
       "step": 340
     },
     {
       "epoch": 0.0224,
+      "grad_norm": 132493.0,
+      "learning_rate": 8.934971838197646e-07,
+      "loss": 0.7133,
       "step": 350
     },
     {
       "epoch": 0.02304,
+      "grad_norm": 85223.625,
+      "learning_rate": 9.190988223246289e-07,
+      "loss": 0.7124,
       "step": 360
     },
     {
       "epoch": 0.02368,
+      "grad_norm": 77868.78125,
+      "learning_rate": 9.447004608294931e-07,
+      "loss": 0.7058,
       "step": 370
     },
     {
       "epoch": 0.02432,
+      "grad_norm": 75874.3046875,
+      "learning_rate": 9.703020993343575e-07,
+      "loss": 0.7139,
       "step": 380
     },
     {
       "epoch": 0.02496,
+      "grad_norm": 151937.703125,
+      "learning_rate": 9.959037378392218e-07,
+      "loss": 0.713,
       "step": 390
     },
     {
       "epoch": 0.0256,
+      "grad_norm": 161711.671875,
+      "learning_rate": 1.021505376344086e-06,
+      "loss": 0.7137,
       "step": 400
     },
     {
       "epoch": 0.02624,
+      "grad_norm": 90800.234375,
+      "learning_rate": 1.0471070148489503e-06,
+      "loss": 0.7091,
       "step": 410
     },
     {
       "epoch": 0.02688,
+      "grad_norm": 82131.34375,
+      "learning_rate": 1.0727086533538148e-06,
+      "loss": 0.7098,
       "step": 420
     },
     {
       "epoch": 0.02752,
+      "grad_norm": 92818.9140625,
+      "learning_rate": 1.0983102918586791e-06,
+      "loss": 0.7099,
       "step": 430
     },
     {
       "epoch": 0.02816,
+      "grad_norm": 88555.5078125,
+      "learning_rate": 1.1239119303635434e-06,
+      "loss": 0.7086,
       "step": 440
     },
     {
       "epoch": 0.0288,
+      "grad_norm": 73428.6015625,
+      "learning_rate": 1.1495135688684077e-06,
+      "loss": 0.7117,
       "step": 450
     },
     {
       "epoch": 0.02944,
+      "grad_norm": 128938.7421875,
+      "learning_rate": 1.175115207373272e-06,
+      "loss": 0.7182,
       "step": 460
     },
     {
       "epoch": 0.03008,
+      "grad_norm": 102742.3359375,
+      "learning_rate": 1.2007168458781362e-06,
+      "loss": 0.7108,
       "step": 470
     },
     {
       "epoch": 0.03072,
+      "grad_norm": 73825.8125,
+      "learning_rate": 1.2263184843830007e-06,
+      "loss": 0.7087,
       "step": 480
     },
     {
       "epoch": 0.03136,
+      "grad_norm": 110930.75,
+      "learning_rate": 1.251920122887865e-06,
+      "loss": 0.7232,
       "step": 490
     },
     {
       "epoch": 0.032,
+      "grad_norm": 95068.84375,
+      "learning_rate": 1.2775217613927293e-06,
+      "loss": 0.703,
       "step": 500
     },
     {
       "epoch": 0.03264,
+      "grad_norm": 118731.9296875,
+      "learning_rate": 1.3031233998975938e-06,
+      "loss": 0.7063,
       "step": 510
     },
     {
       "epoch": 0.03328,
+      "grad_norm": 80511.828125,
+      "learning_rate": 1.3287250384024578e-06,
+      "loss": 0.7143,
       "step": 520
     },
     {
       "epoch": 0.03392,
+      "grad_norm": 84864.484375,
+      "learning_rate": 1.354326676907322e-06,
+      "loss": 0.7055,
       "step": 530
     },
     {
       "epoch": 0.03456,
+      "grad_norm": 107800.109375,
+      "learning_rate": 1.3799283154121864e-06,
+      "loss": 0.7119,
       "step": 540
     },
     {
       "epoch": 0.0352,
+      "grad_norm": 83667.671875,
+      "learning_rate": 1.4055299539170509e-06,
+      "loss": 0.7082,
       "step": 550
     },
     {
       "epoch": 0.03584,
+      "grad_norm": 75656.4140625,
+      "learning_rate": 1.4311315924219151e-06,
+      "loss": 0.7062,
       "step": 560
     },
     {
       "epoch": 0.03648,
+      "grad_norm": 79985.875,
+      "learning_rate": 1.4567332309267796e-06,
+      "loss": 0.7155,
       "step": 570
     },
     {
       "epoch": 0.03712,
+      "grad_norm": 76334.078125,
+      "learning_rate": 1.4823348694316437e-06,
+      "loss": 0.7075,
       "step": 580
     },
     {
       "epoch": 0.03776,
+      "grad_norm": 140764.03125,
+      "learning_rate": 1.507936507936508e-06,
+      "loss": 0.7065,
       "step": 590
     },
     {
       "epoch": 0.0384,
+      "grad_norm": 100877.296875,
+      "learning_rate": 1.5335381464413722e-06,
+      "loss": 0.7096,
       "step": 600
     }
   ],
   "logging_steps": 10,
+  "max_steps": 78125,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
   "save_steps": 200,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {
       "attributes": {}
     }
   },
+  "total_flos": 5049397152295680.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

graphcodebert-robust/checkpoint-600/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ff659b85d84ec0bae53596bc271ba773db9c463626db0f13fd8e747f433dad4
 size 5841

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5fc4023adc281644437a690ea6a6001846d7762699cd428d4ad38e1888076db
 size 5841

graphcodebert-robust/checkpoint-800/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8fe33dfcd47347ccd7588a3d6ffc124e8af6931e6cc5edf7eff42f416eb814e9
 size 498612824

 version https://git-lfs.github.com/spec/v1
+oid sha256:d19fdc7a5fa21c91052f15414ec14e1da4bbc85f75aa66510c1c463b2f14e2f6
 size 498612824

graphcodebert-robust/checkpoint-800/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33bc19163178e07929f74c9874b8faa2235856319b19a9f384fc4e2fcd84fe4c
 size 4741923

 version https://git-lfs.github.com/spec/v1
+oid sha256:ddec1d294331a984f4091595913e06b171ba550334d359ca9c07a294409ad9c1
 size 4741923

graphcodebert-robust/checkpoint-800/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:08e89a90a52ab59c17ebc709062022104092de74a5a21eaffb6603a247770d61
-size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:36c90ae3575630687b6a7d64bf93dded50adb1dbab4b74db0c9cdd2945f93577
+size 14581

graphcodebert-robust/checkpoint-800/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6fef122931c86c2d2736773be787da21ac6460d41580735381e953556fb410be
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
 size 1383

graphcodebert-robust/checkpoint-800/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c662a8bb63968a394648b28695827df4fdf4db740c41d1df9edbc67160fd052
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:63a6e34118894da77328dc4487914a7b9b9dbb71f404e8060d27ed90073c6190
 size 1465

graphcodebert-robust/checkpoint-800/tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 512,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

graphcodebert-robust/checkpoint-800/trainer_state.json CHANGED Viewed

@@ -151,429 +151,429 @@
     },
     {
       "epoch": 0.01344,
-      "grad_norm": 2.1326472759246826,
-      "learning_rate": 2.6752000000000006e-07,
-      "loss": 0.7305,
       "step": 210
     },
     {
       "epoch": 0.01408,
-      "grad_norm": 2.21703839302063,
-      "learning_rate": 2.8032e-07,
-      "loss": 0.7167,
       "step": 220
     },
     {
       "epoch": 0.01472,
-      "grad_norm": 1.6385700702667236,
-      "learning_rate": 2.9312e-07,
-      "loss": 0.7209,
       "step": 230
     },
     {
       "epoch": 0.01536,
-      "grad_norm": 1.4293471574783325,
-      "learning_rate": 3.0592000000000003e-07,
-      "loss": 0.722,
       "step": 240
     },
     {
       "epoch": 0.016,
-      "grad_norm": 2.1437904834747314,
-      "learning_rate": 3.1872e-07,
-      "loss": 0.717,
       "step": 250
     },
     {
       "epoch": 0.01664,
-      "grad_norm": 2.014806032180786,
-      "learning_rate": 3.3152000000000005e-07,
-      "loss": 0.7182,
       "step": 260
     },
     {
       "epoch": 0.01728,
-      "grad_norm": 1.7216386795043945,
-      "learning_rate": 3.4432e-07,
-      "loss": 0.7253,
       "step": 270
     },
     {
       "epoch": 0.01792,
-      "grad_norm": 1.4267009496688843,
-      "learning_rate": 3.5712e-07,
-      "loss": 0.7189,
       "step": 280
     },
     {
       "epoch": 0.01856,
-      "grad_norm": 2.222503185272217,
-      "learning_rate": 3.6992e-07,
-      "loss": 0.7198,
       "step": 290
     },
     {
       "epoch": 0.0192,
-      "grad_norm": 1.578922986984253,
-      "learning_rate": 3.8272000000000003e-07,
-      "loss": 0.717,
       "step": 300
     },
     {
       "epoch": 0.01984,
-      "grad_norm": 1.719905972480774,
-      "learning_rate": 3.9552e-07,
-      "loss": 0.709,
       "step": 310
     },
     {
       "epoch": 0.02048,
-      "grad_norm": 1.4473963975906372,
-      "learning_rate": 4.0832000000000005e-07,
-      "loss": 0.7215,
       "step": 320
     },
     {
       "epoch": 0.02112,
-      "grad_norm": 2.1639790534973145,
-      "learning_rate": 4.2112e-07,
-      "loss": 0.7175,
       "step": 330
     },
     {
       "epoch": 0.02176,
-      "grad_norm": 1.2387958765029907,
-      "learning_rate": 4.3392e-07,
-      "loss": 0.7129,
       "step": 340
     },
     {
       "epoch": 0.0224,
-      "grad_norm": 2.2797842025756836,
-      "learning_rate": 4.4672000000000007e-07,
-      "loss": 0.7159,
       "step": 350
     },
     {
       "epoch": 0.02304,
-      "grad_norm": 1.5692473649978638,
-      "learning_rate": 4.5952000000000003e-07,
-      "loss": 0.7161,
       "step": 360
     },
     {
       "epoch": 0.02368,
-      "grad_norm": 1.4270817041397095,
-      "learning_rate": 4.723200000000001e-07,
-      "loss": 0.7114,
       "step": 370
     },
     {
       "epoch": 0.02432,
-      "grad_norm": 1.4091335535049438,
-      "learning_rate": 4.8512e-07,
-      "loss": 0.7127,
       "step": 380
     },
     {
       "epoch": 0.02496,
-      "grad_norm": 1.8862844705581665,
-      "learning_rate": 4.979200000000001e-07,
-      "loss": 0.7153,
       "step": 390
     },
     {
       "epoch": 0.0256,
-      "grad_norm": 1.9264376163482666,
-      "learning_rate": 5.107200000000001e-07,
-      "loss": 0.7109,
       "step": 400
     },
     {
       "epoch": 0.02624,
-      "grad_norm": 1.4058727025985718,
-      "learning_rate": 5.235200000000001e-07,
-      "loss": 0.705,
       "step": 410
     },
     {
       "epoch": 0.02688,
-      "grad_norm": 1.519445776939392,
-      "learning_rate": 5.363200000000001e-07,
-      "loss": 0.7131,
       "step": 420
     },
     {
       "epoch": 0.02752,
-      "grad_norm": 1.6636698246002197,
-      "learning_rate": 5.491200000000001e-07,
-      "loss": 0.6916,
       "step": 430
     },
     {
       "epoch": 0.02816,
-      "grad_norm": 1.5472590923309326,
-      "learning_rate": 5.6192e-07,
-      "loss": 0.705,
       "step": 440
     },
     {
       "epoch": 0.0288,
-      "grad_norm": 1.4896206855773926,
-      "learning_rate": 5.747200000000001e-07,
-      "loss": 0.7046,
       "step": 450
     },
     {
       "epoch": 0.02944,
-      "grad_norm": 2.2565503120422363,
-      "learning_rate": 5.8752e-07,
-      "loss": 0.7009,
       "step": 460
     },
     {
       "epoch": 0.03008,
-      "grad_norm": 2.017638683319092,
-      "learning_rate": 6.0032e-07,
-      "loss": 0.7058,
       "step": 470
     },
     {
       "epoch": 0.03072,
-      "grad_norm": 1.3399696350097656,
-      "learning_rate": 6.1312e-07,
-      "loss": 0.7003,
       "step": 480
     },
     {
       "epoch": 0.03136,
-      "grad_norm": 1.3090866804122925,
-      "learning_rate": 6.2592e-07,
-      "loss": 0.7067,
       "step": 490
     },
     {
       "epoch": 0.032,
-      "grad_norm": 1.4199142456054688,
-      "learning_rate": 6.3872e-07,
-      "loss": 0.7008,
       "step": 500
     },
     {
       "epoch": 0.03264,
-      "grad_norm": 1.7174904346466064,
-      "learning_rate": 6.515200000000001e-07,
-      "loss": 0.7003,
       "step": 510
     },
     {
       "epoch": 0.03328,
-      "grad_norm": 1.2983943223953247,
-      "learning_rate": 6.643200000000001e-07,
-      "loss": 0.698,
       "step": 520
     },
     {
       "epoch": 0.03392,
-      "grad_norm": 1.8224154710769653,
-      "learning_rate": 6.7712e-07,
-      "loss": 0.7047,
       "step": 530
     },
     {
       "epoch": 0.03456,
-      "grad_norm": 1.3605278730392456,
-      "learning_rate": 6.899200000000001e-07,
-      "loss": 0.6974,
       "step": 540
     },
     {
       "epoch": 0.0352,
-      "grad_norm": 1.4932376146316528,
-      "learning_rate": 7.027200000000001e-07,
-      "loss": 0.6918,
       "step": 550
     },
     {
       "epoch": 0.03584,
-      "grad_norm": 1.2169368267059326,
-      "learning_rate": 7.155200000000001e-07,
-      "loss": 0.6996,
       "step": 560
     },
     {
       "epoch": 0.03648,
-      "grad_norm": 1.5690464973449707,
-      "learning_rate": 7.2832e-07,
-      "loss": 0.6942,
       "step": 570
     },
     {
       "epoch": 0.03712,
-      "grad_norm": 1.541991949081421,
-      "learning_rate": 7.4112e-07,
-      "loss": 0.6973,
       "step": 580
     },
     {
       "epoch": 0.03776,
-      "grad_norm": 1.7749661207199097,
-      "learning_rate": 7.5392e-07,
-      "loss": 0.6865,
       "step": 590
     },
     {
       "epoch": 0.0384,
-      "grad_norm": 1.2169281244277954,
-      "learning_rate": 7.667200000000001e-07,
-      "loss": 0.6876,
       "step": 600
     },
     {
       "epoch": 0.03904,
-      "grad_norm": 1.992397427558899,
-      "learning_rate": 7.7952e-07,
-      "loss": 0.6882,
       "step": 610
     },
     {
       "epoch": 0.03968,
-      "grad_norm": 1.5149959325790405,
-      "learning_rate": 7.9232e-07,
-      "loss": 0.6838,
       "step": 620
     },
     {
       "epoch": 0.04032,
-      "grad_norm": 1.707815170288086,
-      "learning_rate": 8.0512e-07,
-      "loss": 0.6799,
       "step": 630
     },
     {
       "epoch": 0.04096,
-      "grad_norm": 1.2718374729156494,
-      "learning_rate": 8.179200000000001e-07,
-      "loss": 0.6807,
       "step": 640
     },
     {
       "epoch": 0.0416,
-      "grad_norm": 1.4209625720977783,
-      "learning_rate": 8.3072e-07,
-      "loss": 0.6892,
       "step": 650
     },
     {
       "epoch": 0.04224,
-      "grad_norm": 1.295621633529663,
-      "learning_rate": 8.435200000000001e-07,
-      "loss": 0.6827,
       "step": 660
     },
     {
       "epoch": 0.04288,
-      "grad_norm": 1.1997125148773193,
-      "learning_rate": 8.563200000000001e-07,
-      "loss": 0.6815,
       "step": 670
     },
     {
       "epoch": 0.04352,
-      "grad_norm": 1.377269983291626,
-      "learning_rate": 8.691200000000001e-07,
-      "loss": 0.6848,
       "step": 680
     },
     {
       "epoch": 0.04416,
-      "grad_norm": 1.3606722354888916,
-      "learning_rate": 8.819200000000001e-07,
-      "loss": 0.6812,
       "step": 690
     },
     {
       "epoch": 0.0448,
-      "grad_norm": 1.416563630104065,
-      "learning_rate": 8.9472e-07,
-      "loss": 0.6844,
       "step": 700
     },
     {
       "epoch": 0.04544,
-      "grad_norm": 1.6803165674209595,
-      "learning_rate": 9.0752e-07,
-      "loss": 0.6782,
       "step": 710
     },
     {
       "epoch": 0.04608,
-      "grad_norm": 1.4515128135681152,
-      "learning_rate": 9.203200000000001e-07,
-      "loss": 0.671,
       "step": 720
     },
     {
       "epoch": 0.04672,
-      "grad_norm": 1.3443140983581543,
-      "learning_rate": 9.331200000000001e-07,
-      "loss": 0.6754,
       "step": 730
     },
     {
       "epoch": 0.04736,
-      "grad_norm": 1.8091343641281128,
-      "learning_rate": 9.4592e-07,
-      "loss": 0.6651,
       "step": 740
     },
     {
       "epoch": 0.048,
-      "grad_norm": 1.9745153188705444,
-      "learning_rate": 9.587200000000002e-07,
-      "loss": 0.6724,
       "step": 750
     },
     {
       "epoch": 0.04864,
-      "grad_norm": 1.2002370357513428,
-      "learning_rate": 9.7152e-07,
-      "loss": 0.6734,
       "step": 760
     },
     {
       "epoch": 0.04928,
-      "grad_norm": 1.4024569988250732,
-      "learning_rate": 9.843200000000002e-07,
-      "loss": 0.6735,
       "step": 770
     },
     {
       "epoch": 0.04992,
-      "grad_norm": 1.3709276914596558,
-      "learning_rate": 9.9712e-07,
-      "loss": 0.6643,
       "step": 780
     },
     {
       "epoch": 0.05056,
-      "grad_norm": 2.260002851486206,
-      "learning_rate": 1.00992e-06,
-      "loss": 0.6634,
       "step": 790
     },
     {
       "epoch": 0.0512,
-      "grad_norm": 1.0970172882080078,
-      "learning_rate": 1.02272e-06,
-      "loss": 0.6721,
       "step": 800
     }
   ],
   "logging_steps": 10,
-  "max_steps": 156250,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 10,
   "save_steps": 200,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {
@@ -596,7 +596,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6735643017216000.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

     },
     {
       "epoch": 0.01344,
+      "grad_norm": 141396.296875,
+      "learning_rate": 5.350742447516642e-07,
+      "loss": 0.7217,
       "step": 210
     },
     {
       "epoch": 0.01408,
+      "grad_norm": 102339.1640625,
+      "learning_rate": 5.606758832565284e-07,
+      "loss": 0.7215,
       "step": 220
     },
     {
       "epoch": 0.01472,
+      "grad_norm": 134052.9375,
+      "learning_rate": 5.862775217613928e-07,
+      "loss": 0.7115,
       "step": 230
     },
     {
       "epoch": 0.01536,
+      "grad_norm": 87181.984375,
+      "learning_rate": 6.118791602662571e-07,
+      "loss": 0.7241,
       "step": 240
     },
     {
       "epoch": 0.016,
+      "grad_norm": 100231.328125,
+      "learning_rate": 6.374807987711214e-07,
+      "loss": 0.71,
       "step": 250
     },
     {
       "epoch": 0.01664,
+      "grad_norm": 136721.484375,
+      "learning_rate": 6.630824372759858e-07,
+      "loss": 0.7188,
       "step": 260
     },
     {
       "epoch": 0.01728,
+      "grad_norm": 115868.8125,
+      "learning_rate": 6.8868407578085e-07,
+      "loss": 0.7199,
       "step": 270
     },
     {
       "epoch": 0.01792,
+      "grad_norm": 70205.1484375,
+      "learning_rate": 7.142857142857143e-07,
+      "loss": 0.7299,
       "step": 280
     },
     {
       "epoch": 0.01856,
+      "grad_norm": 98926.4453125,
+      "learning_rate": 7.398873527905787e-07,
+      "loss": 0.7159,
       "step": 290
     },
     {
       "epoch": 0.0192,
+      "grad_norm": 134108.140625,
+      "learning_rate": 7.65488991295443e-07,
+      "loss": 0.7122,
       "step": 300
     },
     {
       "epoch": 0.01984,
+      "grad_norm": 103719.140625,
+      "learning_rate": 7.910906298003073e-07,
+      "loss": 0.7185,
       "step": 310
     },
     {
       "epoch": 0.02048,
+      "grad_norm": 85624.953125,
+      "learning_rate": 8.166922683051716e-07,
+      "loss": 0.718,
       "step": 320
     },
     {
       "epoch": 0.02112,
+      "grad_norm": 138824.15625,
+      "learning_rate": 8.422939068100359e-07,
+      "loss": 0.713,
       "step": 330
     },
     {
       "epoch": 0.02176,
+      "grad_norm": 73629.0859375,
+      "learning_rate": 8.678955453149002e-07,
+      "loss": 0.7186,
       "step": 340
     },
     {
       "epoch": 0.0224,
+      "grad_norm": 132493.0,
+      "learning_rate": 8.934971838197646e-07,
+      "loss": 0.7133,
       "step": 350
     },
     {
       "epoch": 0.02304,
+      "grad_norm": 85223.625,
+      "learning_rate": 9.190988223246289e-07,
+      "loss": 0.7124,
       "step": 360
     },
     {
       "epoch": 0.02368,
+      "grad_norm": 77868.78125,
+      "learning_rate": 9.447004608294931e-07,
+      "loss": 0.7058,
       "step": 370
     },
     {
       "epoch": 0.02432,
+      "grad_norm": 75874.3046875,
+      "learning_rate": 9.703020993343575e-07,
+      "loss": 0.7139,
       "step": 380
     },
     {
       "epoch": 0.02496,
+      "grad_norm": 151937.703125,
+      "learning_rate": 9.959037378392218e-07,
+      "loss": 0.713,
       "step": 390
     },
     {
       "epoch": 0.0256,
+      "grad_norm": 161711.671875,
+      "learning_rate": 1.021505376344086e-06,
+      "loss": 0.7137,
       "step": 400
     },
     {
       "epoch": 0.02624,
+      "grad_norm": 90800.234375,
+      "learning_rate": 1.0471070148489503e-06,
+      "loss": 0.7091,
       "step": 410
     },
     {
       "epoch": 0.02688,
+      "grad_norm": 82131.34375,
+      "learning_rate": 1.0727086533538148e-06,
+      "loss": 0.7098,
       "step": 420
     },
     {
       "epoch": 0.02752,
+      "grad_norm": 92818.9140625,
+      "learning_rate": 1.0983102918586791e-06,
+      "loss": 0.7099,
       "step": 430
     },
     {
       "epoch": 0.02816,
+      "grad_norm": 88555.5078125,
+      "learning_rate": 1.1239119303635434e-06,
+      "loss": 0.7086,
       "step": 440
     },
     {
       "epoch": 0.0288,
+      "grad_norm": 73428.6015625,
+      "learning_rate": 1.1495135688684077e-06,
+      "loss": 0.7117,
       "step": 450
     },
     {
       "epoch": 0.02944,
+      "grad_norm": 128938.7421875,
+      "learning_rate": 1.175115207373272e-06,
+      "loss": 0.7182,
       "step": 460
     },
     {
       "epoch": 0.03008,
+      "grad_norm": 102742.3359375,
+      "learning_rate": 1.2007168458781362e-06,
+      "loss": 0.7108,
       "step": 470
     },
     {
       "epoch": 0.03072,
+      "grad_norm": 73825.8125,
+      "learning_rate": 1.2263184843830007e-06,
+      "loss": 0.7087,
       "step": 480
     },
     {
       "epoch": 0.03136,
+      "grad_norm": 110930.75,
+      "learning_rate": 1.251920122887865e-06,
+      "loss": 0.7232,
       "step": 490
     },
     {
       "epoch": 0.032,
+      "grad_norm": 95068.84375,
+      "learning_rate": 1.2775217613927293e-06,
+      "loss": 0.703,
       "step": 500
     },
     {
       "epoch": 0.03264,
+      "grad_norm": 118731.9296875,
+      "learning_rate": 1.3031233998975938e-06,
+      "loss": 0.7063,
       "step": 510
     },
     {
       "epoch": 0.03328,
+      "grad_norm": 80511.828125,
+      "learning_rate": 1.3287250384024578e-06,
+      "loss": 0.7143,
       "step": 520
     },
     {
       "epoch": 0.03392,
+      "grad_norm": 84864.484375,
+      "learning_rate": 1.354326676907322e-06,
+      "loss": 0.7055,
       "step": 530
     },
     {
       "epoch": 0.03456,
+      "grad_norm": 107800.109375,
+      "learning_rate": 1.3799283154121864e-06,
+      "loss": 0.7119,
       "step": 540
     },
     {
       "epoch": 0.0352,
+      "grad_norm": 83667.671875,
+      "learning_rate": 1.4055299539170509e-06,
+      "loss": 0.7082,
       "step": 550
     },
     {
       "epoch": 0.03584,
+      "grad_norm": 75656.4140625,
+      "learning_rate": 1.4311315924219151e-06,
+      "loss": 0.7062,
       "step": 560
     },
     {
       "epoch": 0.03648,
+      "grad_norm": 79985.875,
+      "learning_rate": 1.4567332309267796e-06,
+      "loss": 0.7155,
       "step": 570
     },
     {
       "epoch": 0.03712,
+      "grad_norm": 76334.078125,
+      "learning_rate": 1.4823348694316437e-06,
+      "loss": 0.7075,
       "step": 580
     },
     {
       "epoch": 0.03776,
+      "grad_norm": 140764.03125,
+      "learning_rate": 1.507936507936508e-06,
+      "loss": 0.7065,
       "step": 590
     },
     {
       "epoch": 0.0384,
+      "grad_norm": 100877.296875,
+      "learning_rate": 1.5335381464413722e-06,
+      "loss": 0.7096,
       "step": 600
     },
     {
       "epoch": 0.03904,
+      "grad_norm": 104088.1171875,
+      "learning_rate": 1.5591397849462367e-06,
+      "loss": 0.6987,
       "step": 610
     },
     {
       "epoch": 0.03968,
+      "grad_norm": 80806.2265625,
+      "learning_rate": 1.584741423451101e-06,
+      "loss": 0.707,
       "step": 620
     },
     {
       "epoch": 0.04032,
+      "grad_norm": 109884.765625,
+      "learning_rate": 1.6103430619559655e-06,
+      "loss": 0.6991,
       "step": 630
     },
     {
       "epoch": 0.04096,
+      "grad_norm": 79944.890625,
+      "learning_rate": 1.6359447004608298e-06,
+      "loss": 0.7047,
       "step": 640
     },
     {
       "epoch": 0.0416,
+      "grad_norm": 93673.3828125,
+      "learning_rate": 1.6615463389656938e-06,
+      "loss": 0.6971,
       "step": 650
     },
     {
       "epoch": 0.04224,
+      "grad_norm": 76641.265625,
+      "learning_rate": 1.6871479774705581e-06,
+      "loss": 0.6957,
       "step": 660
     },
     {
       "epoch": 0.04288,
+      "grad_norm": 73583.5546875,
+      "learning_rate": 1.7127496159754226e-06,
+      "loss": 0.7028,
       "step": 670
     },
     {
       "epoch": 0.04352,
+      "grad_norm": 75177.9609375,
+      "learning_rate": 1.7383512544802869e-06,
+      "loss": 0.7012,
       "step": 680
     },
     {
       "epoch": 0.04416,
+      "grad_norm": 78340.8515625,
+      "learning_rate": 1.7639528929851512e-06,
+      "loss": 0.6987,
       "step": 690
     },
     {
       "epoch": 0.0448,
+      "grad_norm": 86004.1171875,
+      "learning_rate": 1.7895545314900157e-06,
+      "loss": 0.7061,
       "step": 700
     },
     {
       "epoch": 0.04544,
+      "grad_norm": 94212.0390625,
+      "learning_rate": 1.8151561699948797e-06,
+      "loss": 0.6993,
       "step": 710
     },
     {
       "epoch": 0.04608,
+      "grad_norm": 83918.2421875,
+      "learning_rate": 1.840757808499744e-06,
+      "loss": 0.7009,
       "step": 720
     },
     {
       "epoch": 0.04672,
+      "grad_norm": 68374.3125,
+      "learning_rate": 1.8663594470046085e-06,
+      "loss": 0.6964,
       "step": 730
     },
     {
       "epoch": 0.04736,
+      "grad_norm": 90348.78125,
+      "learning_rate": 1.8919610855094728e-06,
+      "loss": 0.7011,
       "step": 740
     },
     {
       "epoch": 0.048,
+      "grad_norm": 146658.0,
+      "learning_rate": 1.9175627240143373e-06,
+      "loss": 0.7003,
       "step": 750
     },
     {
       "epoch": 0.04864,
+      "grad_norm": 112037.1640625,
+      "learning_rate": 1.9431643625192015e-06,
+      "loss": 0.7051,
       "step": 760
     },
     {
       "epoch": 0.04928,
+      "grad_norm": 70628.625,
+      "learning_rate": 1.9687660010240654e-06,
+      "loss": 0.6923,
       "step": 770
     },
     {
       "epoch": 0.04992,
+      "grad_norm": 109922.125,
+      "learning_rate": 1.99436763952893e-06,
+      "loss": 0.6893,
       "step": 780
     },
     {
       "epoch": 0.05056,
+      "grad_norm": 135306.375,
+      "learning_rate": 2.0199692780337944e-06,
+      "loss": 0.7008,
       "step": 790
     },
     {
       "epoch": 0.0512,
+      "grad_norm": 82354.8046875,
+      "learning_rate": 2.0455709165386586e-06,
+      "loss": 0.705,
       "step": 800
     }
   ],
   "logging_steps": 10,
+  "max_steps": 78125,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
   "save_steps": 200,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {
       "attributes": {}
     }
   },
+  "total_flos": 6733094128867200.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

graphcodebert-robust/checkpoint-800/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ff659b85d84ec0bae53596bc271ba773db9c463626db0f13fd8e747f433dad4
 size 5841

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5fc4023adc281644437a690ea6a6001846d7762699cd428d4ad38e1888076db
 size 5841

graphcodebert-robust/training.log CHANGED Viewed

@@ -1,10 +1,10 @@
-2026-04-15 17:47:48,928 - INFO - train_pipeline - Logging to ./output_checkpoints/graphcodebert-robust/training.log
-2026-04-15 17:47:48,933 - INFO - train_pipeline - Training config: TrainConfig(model_name='microsoft/graphcodebert-base', output_dir='./output_checkpoints/graphcodebert-robust', num_epochs=10, batch_size=32, learning_rate=2e-05, max_length=512, num_labels=2, use_wandb=True, freeze_base=True, loss_type='r-drop', focal_alpha=1.0, focal_gamma=2.0, r_drop_alpha=4.0, infonce_temperature=0.07, infonce_weight=0.5, seed=42, resume_from_checkpoint=None, label_smoothing=0.1, adversarial_epsilon=0.5, use_swa=False, swa_start_epoch=2, swa_lr=1e-05, data_augmentation=True, aug_rename_prob=0.3, aug_format_prob=0.3, device=device(type='cuda'))
-2026-04-15 17:47:48,936 - INFO - train_pipeline - Loading model & tokenizer for 'microsoft/graphcodebert-base'
-2026-04-15 17:47:51,171 - INFO - train_pipeline - Model placed on cuda
-2026-04-15 17:47:51,174 - INFO - train_pipeline - Base model weights frozen – only classifier head will be trained.
-2026-04-15 17:47:51,177 - INFO - train_pipeline - ===== Model Architecture =====
-2026-04-15 17:47:51,180 - INFO - train_pipeline -
 RobertaForSequenceClassification(
   (roberta): RobertaModel(
     (embeddings): RobertaEmbeddings(
@@ -49,12 +49,12 @@ RobertaForSequenceClassification(
     (out_proj): Linear(in_features=768, out_features=2, bias=True)
   )
 )
-2026-04-15 17:47:51,186 - INFO - train_pipeline - ===== Parameter Summary =====
-2026-04-15 17:47:51,189 - INFO - train_pipeline - Total Parameters:         124,647,170
-2026-04-15 17:47:51,191 - INFO - train_pipeline - Trainable Parameters:     592,130
-2026-04-15 17:47:51,193 - INFO - train_pipeline - Non-trainable Parameters: 124,055,040
-2026-04-15 17:47:51,195 - INFO - train_pipeline - ===== Tokenizer Summary =====
-2026-04-15 17:47:51,224 - INFO - train_pipeline - Vocab size: 50265 | Special tokens: ['<s>', '</s>', '<unk>', '<pad>', '<mask>']
-2026-04-15 17:47:51,227 - INFO - train_pipeline - ===== End of Architecture Log =====
-2026-04-15 17:47:54,338 - INFO - train_pipeline - Data augmentation enabled (rename=0.3, format=0.3)
-2026-04-15 17:47:54,374 - INFO - train_pipeline - === Starting training with robust regularisation ===

+2026-04-16 09:18:35,093 - INFO - train_pipeline - Logging to ./output_checkpoints/graphcodebert-robust/training.log
+2026-04-16 09:18:35,094 - INFO - train_pipeline - Training config: TrainConfig(model_name='microsoft/graphcodebert-base', output_dir='./output_checkpoints/graphcodebert-robust', num_epochs=5, batch_size=32, learning_rate=2e-05, max_length=512, num_labels=2, use_wandb=True, freeze_base=True, loss_type='r-drop', focal_alpha=1.0, focal_gamma=2.0, r_drop_alpha=4.0, infonce_temperature=0.07, infonce_weight=0.5, seed=42, resume_from_checkpoint='checkpoints/graphcodebert-robust/checkpoint-200', label_smoothing=0.1, adversarial_epsilon=0.5, use_swa=True, swa_start_epoch=2, swa_lr=1e-05, data_augmentation=True, aug_rename_prob=0.3, aug_format_prob=0.3, device=device(type='cuda'))
+2026-04-16 09:18:35,094 - INFO - train_pipeline - Loading model & tokenizer for 'microsoft/graphcodebert-base'
+2026-04-16 09:18:43,368 - INFO - train_pipeline - Model placed on cuda
+2026-04-16 09:18:43,371 - INFO - train_pipeline - Base model weights frozen – only classifier head will be trained.
+2026-04-16 09:18:43,372 - INFO - train_pipeline - ===== Model Architecture =====
+2026-04-16 09:18:43,375 - INFO - train_pipeline -
 RobertaForSequenceClassification(
   (roberta): RobertaModel(
     (embeddings): RobertaEmbeddings(
     (out_proj): Linear(in_features=768, out_features=2, bias=True)
   )
 )
+2026-04-16 09:18:43,377 - INFO - train_pipeline - ===== Parameter Summary =====
+2026-04-16 09:18:43,378 - INFO - train_pipeline - Total Parameters:         124,647,170
+2026-04-16 09:18:43,380 - INFO - train_pipeline - Trainable Parameters:     592,130
+2026-04-16 09:18:43,381 - INFO - train_pipeline - Non-trainable Parameters: 124,055,040
+2026-04-16 09:18:43,381 - INFO - train_pipeline - ===== Tokenizer Summary =====
+2026-04-16 09:18:43,409 - INFO - train_pipeline - Vocab size: 50265 | Special tokens: ['<s>', '</s>', '<unk>', '<pad>', '<mask>']
+2026-04-16 09:18:43,410 - INFO - train_pipeline - ===== End of Architecture Log =====
+2026-04-16 09:18:43,411 - INFO - train_pipeline - Data augmentation enabled (rename=0.3, format=0.3)
+2026-04-16 09:22:04,475 - INFO - train_pipeline - === Starting training with robust regularisation ===