Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

config.json +30 -0
model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
tokenizer.json +0 -0
tokenizer_config.json +16 -0
trainer_state.json +610 -0
training_args.bin +3 -0
training_metadata.json +10 -0

config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "add_cross_attention": false,
+  "architectures": [
+    "RobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "is_decoder": false,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 1,
+  "problem_type": "single_label_classification",
+  "tie_word_embeddings": true,
+  "transformers_version": "5.0.0",
+  "type_vocab_size": 1,
+  "use_cache": false,
+  "vocab_size": 50265
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:209e18b1560b4629e233cb8f109fc79fe8783636765d1587b501d3d9d6d04632
+size 498612800

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8875c9eda4a9444a1d3d0f08f68b7a72f17ba43003aa9550550c60e286f2078f
+size 997348747

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e35fdb8910d426c9c16878ea3078dc0568d86e073d0a7f891b2f09ffe5b5b22e
+size 14645

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c75de975c68bbeb8ce37282f26021c4e18754c36408856eaa382dc6627e5aaf3
+size 1465

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "is_local": true,
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,610 @@

+{
+  "best_global_step": 4000,
+  "best_metric": 0.6931638121604919,
+  "best_model_checkpoint": "/content/drive/MyDrive/PolyGuard/model_final/checkpoint-4000",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 8000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.025,
+      "grad_norm": 4.840836524963379,
+      "learning_rate": 2.4750000000000002e-05,
+      "loss": 0.6476753997802734,
+      "step": 100
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 3.3573901653289795,
+      "learning_rate": 4.975e-05,
+      "loss": 0.6003963470458984,
+      "step": 200
+    },
+    {
+      "epoch": 0.075,
+      "grad_norm": 5.3957953453063965,
+      "learning_rate": 4.96867088607595e-05,
+      "loss": 0.5752050399780273,
+      "step": 300
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 6.348972797393799,
+      "learning_rate": 4.937025316455696e-05,
+      "loss": 0.5668778610229492,
+      "step": 400
+    },
+    {
+      "epoch": 0.125,
+      "grad_norm": 3.777247428894043,
+      "learning_rate": 4.905379746835443e-05,
+      "loss": 0.6448370361328125,
+      "step": 500
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 5.974273204803467,
+      "learning_rate": 4.87373417721519e-05,
+      "loss": 0.5914559173583984,
+      "step": 600
+    },
+    {
+      "epoch": 0.175,
+      "grad_norm": 5.3988165855407715,
+      "learning_rate": 4.842088607594937e-05,
+      "loss": 0.6363700103759765,
+      "step": 700
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 2.299058675765991,
+      "learning_rate": 4.810443037974684e-05,
+      "loss": 0.5667419815063477,
+      "step": 800
+    },
+    {
+      "epoch": 0.225,
+      "grad_norm": 3.1783978939056396,
+      "learning_rate": 4.7787974683544305e-05,
+      "loss": 0.5284980392456055,
+      "step": 900
+    },
+    {
+      "epoch": 0.25,
+      "grad_norm": 3.9692907333374023,
+      "learning_rate": 4.747151898734177e-05,
+      "loss": 0.56060546875,
+      "step": 1000
+    },
+    {
+      "epoch": 0.275,
+      "grad_norm": 2.46657657623291,
+      "learning_rate": 4.715506329113925e-05,
+      "loss": 0.5778974151611328,
+      "step": 1100
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 1.4161385297775269,
+      "learning_rate": 4.683860759493671e-05,
+      "loss": 0.5550444412231446,
+      "step": 1200
+    },
+    {
+      "epoch": 0.325,
+      "grad_norm": 6.127955436706543,
+      "learning_rate": 4.652215189873418e-05,
+      "loss": 0.638050308227539,
+      "step": 1300
+    },
+    {
+      "epoch": 0.35,
+      "grad_norm": 2.9411966800689697,
+      "learning_rate": 4.620569620253164e-05,
+      "loss": 0.5464860153198242,
+      "step": 1400
+    },
+    {
+      "epoch": 0.375,
+      "grad_norm": 5.668337345123291,
+      "learning_rate": 4.588924050632912e-05,
+      "loss": 0.6324460220336914,
+      "step": 1500
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 5.304285049438477,
+      "learning_rate": 4.5572784810126585e-05,
+      "loss": 0.5795536041259766,
+      "step": 1600
+    },
+    {
+      "epoch": 0.425,
+      "grad_norm": 1.6371102333068848,
+      "learning_rate": 4.525632911392405e-05,
+      "loss": 0.5774750137329101,
+      "step": 1700
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 1.4340555667877197,
+      "learning_rate": 4.493987341772152e-05,
+      "loss": 0.6004017257690429,
+      "step": 1800
+    },
+    {
+      "epoch": 0.475,
+      "grad_norm": 2.997642755508423,
+      "learning_rate": 4.462341772151899e-05,
+      "loss": 0.6037093734741211,
+      "step": 1900
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 1.124809741973877,
+      "learning_rate": 4.430696202531646e-05,
+      "loss": 0.5478248596191406,
+      "step": 2000
+    },
+    {
+      "epoch": 0.525,
+      "grad_norm": 1.4012881517410278,
+      "learning_rate": 4.399050632911393e-05,
+      "loss": 0.5746703720092774,
+      "step": 2100
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 2.774062395095825,
+      "learning_rate": 4.367405063291139e-05,
+      "loss": 0.5518299865722657,
+      "step": 2200
+    },
+    {
+      "epoch": 0.575,
+      "grad_norm": 0.9460004568099976,
+      "learning_rate": 4.3357594936708864e-05,
+      "loss": 0.5653076553344727,
+      "step": 2300
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 4.564599514007568,
+      "learning_rate": 4.304113924050633e-05,
+      "loss": 0.626568832397461,
+      "step": 2400
+    },
+    {
+      "epoch": 0.625,
+      "grad_norm": 0.9103949666023254,
+      "learning_rate": 4.27246835443038e-05,
+      "loss": 0.5939551544189453,
+      "step": 2500
+    },
+    {
+      "epoch": 0.65,
+      "grad_norm": 3.449150800704956,
+      "learning_rate": 4.2408227848101265e-05,
+      "loss": 0.5971554946899414,
+      "step": 2600
+    },
+    {
+      "epoch": 0.675,
+      "grad_norm": 3.5549769401550293,
+      "learning_rate": 4.2091772151898736e-05,
+      "loss": 0.5751391983032227,
+      "step": 2700
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 1.6542292833328247,
+      "learning_rate": 4.177531645569621e-05,
+      "loss": 0.5986330032348632,
+      "step": 2800
+    },
+    {
+      "epoch": 0.725,
+      "grad_norm": 1.3074430227279663,
+      "learning_rate": 4.145886075949367e-05,
+      "loss": 0.5717515182495118,
+      "step": 2900
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 3.095973014831543,
+      "learning_rate": 4.114240506329114e-05,
+      "loss": 0.5813043594360352,
+      "step": 3000
+    },
+    {
+      "epoch": 0.775,
+      "grad_norm": 2.611328363418579,
+      "learning_rate": 4.0825949367088615e-05,
+      "loss": 0.6051469039916992,
+      "step": 3100
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 2.4239795207977295,
+      "learning_rate": 4.050949367088608e-05,
+      "loss": 0.5933720779418945,
+      "step": 3200
+    },
+    {
+      "epoch": 0.825,
+      "grad_norm": 1.3653069734573364,
+      "learning_rate": 4.0193037974683544e-05,
+      "loss": 0.5751293182373047,
+      "step": 3300
+    },
+    {
+      "epoch": 0.85,
+      "grad_norm": 1.4362722635269165,
+      "learning_rate": 3.9876582278481015e-05,
+      "loss": 0.5673767852783204,
+      "step": 3400
+    },
+    {
+      "epoch": 0.875,
+      "grad_norm": 1.1265358924865723,
+      "learning_rate": 3.956012658227849e-05,
+      "loss": 0.6048561477661133,
+      "step": 3500
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 1.3712810277938843,
+      "learning_rate": 3.924367088607595e-05,
+      "loss": 0.6653845977783203,
+      "step": 3600
+    },
+    {
+      "epoch": 0.925,
+      "grad_norm": 1.213412880897522,
+      "learning_rate": 3.8927215189873416e-05,
+      "loss": 0.6943452453613281,
+      "step": 3700
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 2.019483804702759,
+      "learning_rate": 3.861075949367089e-05,
+      "loss": 0.6974296569824219,
+      "step": 3800
+    },
+    {
+      "epoch": 0.975,
+      "grad_norm": 1.273471713066101,
+      "learning_rate": 3.829430379746836e-05,
+      "loss": 0.706220474243164,
+      "step": 3900
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 2.3656868934631348,
+      "learning_rate": 3.7977848101265823e-05,
+      "loss": 0.702726058959961,
+      "step": 4000
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.6931638121604919,
+      "eval_runtime": 166.9577,
+      "eval_samples_per_second": 47.916,
+      "eval_steps_per_second": 5.99,
+      "step": 4000
+    },
+    {
+      "epoch": 1.025,
+      "grad_norm": 1.4108144044876099,
+      "learning_rate": 3.7661392405063295e-05,
+      "loss": 0.6999430847167969,
+      "step": 4100
+    },
+    {
+      "epoch": 1.05,
+      "grad_norm": 2.259119987487793,
+      "learning_rate": 3.734493670886076e-05,
+      "loss": 0.7005876922607421,
+      "step": 4200
+    },
+    {
+      "epoch": 1.075,
+      "grad_norm": 0.8190616965293884,
+      "learning_rate": 3.702848101265823e-05,
+      "loss": 0.6983786010742188,
+      "step": 4300
+    },
+    {
+      "epoch": 1.1,
+      "grad_norm": 1.3949053287506104,
+      "learning_rate": 3.67120253164557e-05,
+      "loss": 0.6970309448242188,
+      "step": 4400
+    },
+    {
+      "epoch": 1.125,
+      "grad_norm": 1.9007196426391602,
+      "learning_rate": 3.639556962025317e-05,
+      "loss": 0.7024803161621094,
+      "step": 4500
+    },
+    {
+      "epoch": 1.15,
+      "grad_norm": 3.2963850498199463,
+      "learning_rate": 3.607911392405063e-05,
+      "loss": 0.6969153594970703,
+      "step": 4600
+    },
+    {
+      "epoch": 1.175,
+      "grad_norm": 2.8197951316833496,
+      "learning_rate": 3.57626582278481e-05,
+      "loss": 0.6913418579101562,
+      "step": 4700
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 3.7260422706604004,
+      "learning_rate": 3.5446202531645574e-05,
+      "loss": 0.6954251098632812,
+      "step": 4800
+    },
+    {
+      "epoch": 1.225,
+      "grad_norm": 2.538835048675537,
+      "learning_rate": 3.512974683544304e-05,
+      "loss": 0.7016423797607422,
+      "step": 4900
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 1.9548262357711792,
+      "learning_rate": 3.48132911392405e-05,
+      "loss": 0.6976743316650391,
+      "step": 5000
+    },
+    {
+      "epoch": 1.275,
+      "grad_norm": 4.0937180519104,
+      "learning_rate": 3.4496835443037975e-05,
+      "loss": 0.700084228515625,
+      "step": 5100
+    },
+    {
+      "epoch": 1.3,
+      "grad_norm": 2.521569013595581,
+      "learning_rate": 3.4180379746835446e-05,
+      "loss": 0.6976382446289062,
+      "step": 5200
+    },
+    {
+      "epoch": 1.325,
+      "grad_norm": 4.047502517700195,
+      "learning_rate": 3.386392405063291e-05,
+      "loss": 0.6967656707763672,
+      "step": 5300
+    },
+    {
+      "epoch": 1.35,
+      "grad_norm": 3.4342639446258545,
+      "learning_rate": 3.354746835443038e-05,
+      "loss": 0.6955546569824219,
+      "step": 5400
+    },
+    {
+      "epoch": 1.375,
+      "grad_norm": 1.5393496751785278,
+      "learning_rate": 3.3231012658227854e-05,
+      "loss": 0.6965518951416015,
+      "step": 5500
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 4.321380138397217,
+      "learning_rate": 3.291455696202532e-05,
+      "loss": 0.6958496856689453,
+      "step": 5600
+    },
+    {
+      "epoch": 1.425,
+      "grad_norm": 1.1191785335540771,
+      "learning_rate": 3.259810126582279e-05,
+      "loss": 0.6999098968505859,
+      "step": 5700
+    },
+    {
+      "epoch": 1.45,
+      "grad_norm": 0.8614036440849304,
+      "learning_rate": 3.2281645569620254e-05,
+      "loss": 0.6959712219238281,
+      "step": 5800
+    },
+    {
+      "epoch": 1.475,
+      "grad_norm": 0.9664958715438843,
+      "learning_rate": 3.1965189873417725e-05,
+      "loss": 0.6925695037841797,
+      "step": 5900
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 2.1453211307525635,
+      "learning_rate": 3.164873417721519e-05,
+      "loss": 0.698175048828125,
+      "step": 6000
+    },
+    {
+      "epoch": 1.525,
+      "grad_norm": 2.1440930366516113,
+      "learning_rate": 3.133227848101266e-05,
+      "loss": 0.682925796508789,
+      "step": 6100
+    },
+    {
+      "epoch": 1.55,
+      "grad_norm": 0.8990124464035034,
+      "learning_rate": 3.1015822784810126e-05,
+      "loss": 0.6517456817626953,
+      "step": 6200
+    },
+    {
+      "epoch": 1.575,
+      "grad_norm": 1.3929342031478882,
+      "learning_rate": 3.06993670886076e-05,
+      "loss": 0.7006549835205078,
+      "step": 6300
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 2.1912875175476074,
+      "learning_rate": 3.0382911392405065e-05,
+      "loss": 0.6975661468505859,
+      "step": 6400
+    },
+    {
+      "epoch": 1.625,
+      "grad_norm": 1.1694247722625732,
+      "learning_rate": 3.0066455696202533e-05,
+      "loss": 0.6955360412597656,
+      "step": 6500
+    },
+    {
+      "epoch": 1.65,
+      "grad_norm": 3.340589761734009,
+      "learning_rate": 2.975e-05,
+      "loss": 0.7008393859863281,
+      "step": 6600
+    },
+    {
+      "epoch": 1.675,
+      "grad_norm": 2.42465877532959,
+      "learning_rate": 2.9433544303797473e-05,
+      "loss": 0.6998232269287109,
+      "step": 6700
+    },
+    {
+      "epoch": 1.7,
+      "grad_norm": 1.9139105081558228,
+      "learning_rate": 2.9117088607594937e-05,
+      "loss": 0.6989698028564453,
+      "step": 6800
+    },
+    {
+      "epoch": 1.725,
+      "grad_norm": 0.7264005541801453,
+      "learning_rate": 2.8800632911392405e-05,
+      "loss": 0.6944959259033203,
+      "step": 6900
+    },
+    {
+      "epoch": 1.75,
+      "grad_norm": 1.1322827339172363,
+      "learning_rate": 2.8484177215189873e-05,
+      "loss": 0.7098442840576172,
+      "step": 7000
+    },
+    {
+      "epoch": 1.775,
+      "grad_norm": 2.150141477584839,
+      "learning_rate": 2.8167721518987345e-05,
+      "loss": 0.6946941375732422,
+      "step": 7100
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 1.850095510482788,
+      "learning_rate": 2.785126582278481e-05,
+      "loss": 0.6941130065917969,
+      "step": 7200
+    },
+    {
+      "epoch": 1.825,
+      "grad_norm": 1.994320273399353,
+      "learning_rate": 2.7534810126582277e-05,
+      "loss": 0.6933687591552734,
+      "step": 7300
+    },
+    {
+      "epoch": 1.85,
+      "grad_norm": 1.1272798776626587,
+      "learning_rate": 2.721835443037975e-05,
+      "loss": 0.7010916900634766,
+      "step": 7400
+    },
+    {
+      "epoch": 1.875,
+      "grad_norm": 2.2662463188171387,
+      "learning_rate": 2.6901898734177217e-05,
+      "loss": 0.6951226806640625,
+      "step": 7500
+    },
+    {
+      "epoch": 1.9,
+      "grad_norm": 2.8019468784332275,
+      "learning_rate": 2.6585443037974685e-05,
+      "loss": 0.6966998291015625,
+      "step": 7600
+    },
+    {
+      "epoch": 1.925,
+      "grad_norm": 2.3949637413024902,
+      "learning_rate": 2.6268987341772156e-05,
+      "loss": 0.6956380462646484,
+      "step": 7700
+    },
+    {
+      "epoch": 1.95,
+      "grad_norm": 2.5100715160369873,
+      "learning_rate": 2.595253164556962e-05,
+      "loss": 0.6968719482421875,
+      "step": 7800
+    },
+    {
+      "epoch": 1.975,
+      "grad_norm": 6.460758209228516,
+      "learning_rate": 2.563607594936709e-05,
+      "loss": 0.692848892211914,
+      "step": 7900
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 3.2004637718200684,
+      "learning_rate": 2.5319620253164557e-05,
+      "loss": 0.6972612762451171,
+      "step": 8000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6932027339935303,
+      "eval_runtime": 166.3088,
+      "eval_samples_per_second": 48.103,
+      "eval_steps_per_second": 6.013,
+      "step": 8000
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 16000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8419553771520000.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:144d97010bcda68f4d4b7151c2bcd2b1d3638f1a60032525b2ff2fdfd41fd895
+size 5201

training_metadata.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "trained_at": "2026-04-29T00:06:05.040980",
+  "base_model": "microsoft/codebert-base",
+  "train_samples": 16681,
+  "val_samples": 2275,
+  "best_f1": 0.6698,
+  "epochs_trained": 14,
+  "augmented": true,
+  "version": "v5_extended"
+}