dhananjay2912 commited on Apr 28

Commit

2f889a9

•

1 Parent(s): e6ad748

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

README.md +37 -0
added_tokens.json +3 -0
checkpoint-453/config.json +80 -0
checkpoint-453/model.safetensors +3 -0
checkpoint-453/optimizer.pt +3 -0
checkpoint-453/rng_state.pth +3 -0
checkpoint-453/scheduler.pt +3 -0
checkpoint-453/trainer_state.json +1657 -0
checkpoint-453/training_args.bin +3 -0
config.json +80 -0
model.safetensors +3 -0
runs/Apr28_03-13-55_r-dhananjay2912-deberta-section-classifier-el2khqmv-8c8a0-vi3xw/events.out.tfevents.1714274035.r-dhananjay2912-deberta-section-classifier-el2khqmv-8c8a0-vi3xw.61.0 +2 -2
runs/Apr28_03-13-55_r-dhananjay2912-deberta-section-classifier-el2khqmv-8c8a0-vi3xw/events.out.tfevents.1714274127.r-dhananjay2912-deberta-section-classifier-el2khqmv-8c8a0-vi3xw.61.1 +3 -0
special_tokens_map.json +15 -0
spm.model +3 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
training_args.bin +3 -0
training_params.json +28 -0

README.md ADDED Viewed

	@@ -0,0 +1,37 @@

+---
+tags:
+- autotrain
+- text-classification
+widget:
+- text: "I love AutoTrain"
+datasets:
+- autotrain-9c20u-twasm/autotrain-data
+---
+# Model Trained Using AutoTrain
+- Problem type: Text Classification
+## Validation Metrics
+loss: 1.4913766384124756
+f1_macro: 0.28164367547346275
+f1_micro: 0.64
+f1_weighted: 0.5917376665887304
+precision_macro: 0.2705775014459225
+precision_micro: 0.64
+precision_weighted: 0.5802396761133604
+recall_macro: 0.3324350649350649
+recall_micro: 0.64
+recall_weighted: 0.64
+accuracy: 0.64

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 128000
+}

checkpoint-453/config.json ADDED Viewed

	@@ -0,0 +1,80 @@

+{
+  "_name_or_path": "microsoft/deberta-v3-base",
+  "_num_labels": 20,
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "ALLERGY",
+    "1": "ASSESSMENT",
+    "2": "CC",
+    "3": "DIAGNOSIS",
+    "4": "DISPOSITION",
+    "5": "EDCOURSE",
+    "6": "EXAM",
+    "7": "FAM/SOCHX",
+    "8": "GENHX",
+    "9": "GYNHX",
+    "10": "IMAGING",
+    "11": "IMMUNIZATIONS",
+    "12": "LABS",
+    "13": "MEDICATIONS",
+    "14": "OTHER_HISTORY",
+    "15": "PASTMEDICALHX",
+    "16": "PASTSURGICAL",
+    "17": "PLAN",
+    "18": "PROCEDURES",
+    "19": "ROS"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "ALLERGY": 0,
+    "ASSESSMENT": 1,
+    "CC": 2,
+    "DIAGNOSIS": 3,
+    "DISPOSITION": 4,
+    "EDCOURSE": 5,
+    "EXAM": 6,
+    "FAM/SOCHX": 7,
+    "GENHX": 8,
+    "GYNHX": 9,
+    "IMAGING": 10,
+    "IMMUNIZATIONS": 11,
+    "LABS": 12,
+    "MEDICATIONS": 13,
+    "OTHER_HISTORY": 14,
+    "PASTMEDICALHX": 15,
+    "PASTSURGICAL": 16,
+    "PLAN": 17,
+    "PROCEDURES": 18,
+    "ROS": 19
+  },
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.1",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

checkpoint-453/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9978b903ae04e38477cfbe4c26b677f26202043973af526dd217734af02f25a8
+size 737774648

checkpoint-453/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b68088eab00b4814fe3f91902478e18eec2c80f743e1c211a3e8d1b1f9724b2
+size 1475669114

checkpoint-453/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3634414f112d61a01d7992b90ef33013ff7b8fc254dc6fa0804b27050b7420e7
+size 14244

checkpoint-453/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba9abac2a9ca38a6ee08487e65596abd4f2e71ca0d947051295b5380a7e2cca2
+size 1064

checkpoint-453/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1657 @@

+{
+  "best_metric": 1.4913766384124756,
+  "best_model_checkpoint": "autotrain-9c20u-twasm/checkpoint-453",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 453,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.013245033112582781,
+      "grad_norm": 3.3913655281066895,
+      "learning_rate": 2.173913043478261e-06,
+      "loss": 2.9946,
+      "step": 2
+    },
+    {
+      "epoch": 0.026490066225165563,
+      "grad_norm": 3.8051860332489014,
+      "learning_rate": 4.347826086956522e-06,
+      "loss": 3.0636,
+      "step": 4
+    },
+    {
+      "epoch": 0.039735099337748346,
+      "grad_norm": 4.160909652709961,
+      "learning_rate": 6.521739130434783e-06,
+      "loss": 3.0633,
+      "step": 6
+    },
+    {
+      "epoch": 0.052980132450331126,
+      "grad_norm": 3.9198436737060547,
+      "learning_rate": 8.695652173913044e-06,
+      "loss": 3.0431,
+      "step": 8
+    },
+    {
+      "epoch": 0.06622516556291391,
+      "grad_norm": 3.415198564529419,
+      "learning_rate": 1.0869565217391305e-05,
+      "loss": 3.0373,
+      "step": 10
+    },
+    {
+      "epoch": 0.07947019867549669,
+      "grad_norm": 4.432325839996338,
+      "learning_rate": 1.3043478260869566e-05,
+      "loss": 3.0171,
+      "step": 12
+    },
+    {
+      "epoch": 0.09271523178807947,
+      "grad_norm": 3.669222354888916,
+      "learning_rate": 1.5217391304347828e-05,
+      "loss": 3.0287,
+      "step": 14
+    },
+    {
+      "epoch": 0.10596026490066225,
+      "grad_norm": 4.237015724182129,
+      "learning_rate": 1.739130434782609e-05,
+      "loss": 2.9171,
+      "step": 16
+    },
+    {
+      "epoch": 0.11920529801324503,
+      "grad_norm": 4.314387798309326,
+      "learning_rate": 1.956521739130435e-05,
+      "loss": 2.9663,
+      "step": 18
+    },
+    {
+      "epoch": 0.13245033112582782,
+      "grad_norm": 4.890565872192383,
+      "learning_rate": 2.173913043478261e-05,
+      "loss": 2.885,
+      "step": 20
+    },
+    {
+      "epoch": 0.1456953642384106,
+      "grad_norm": 4.127910137176514,
+      "learning_rate": 2.391304347826087e-05,
+      "loss": 2.9042,
+      "step": 22
+    },
+    {
+      "epoch": 0.15894039735099338,
+      "grad_norm": 5.797154903411865,
+      "learning_rate": 2.608695652173913e-05,
+      "loss": 2.743,
+      "step": 24
+    },
+    {
+      "epoch": 0.17218543046357615,
+      "grad_norm": 4.610489845275879,
+      "learning_rate": 2.826086956521739e-05,
+      "loss": 2.727,
+      "step": 26
+    },
+    {
+      "epoch": 0.18543046357615894,
+      "grad_norm": 4.88078498840332,
+      "learning_rate": 3.0434782608695656e-05,
+      "loss": 2.6978,
+      "step": 28
+    },
+    {
+      "epoch": 0.1986754966887417,
+      "grad_norm": 4.7736334800720215,
+      "learning_rate": 3.260869565217392e-05,
+      "loss": 2.6417,
+      "step": 30
+    },
+    {
+      "epoch": 0.2119205298013245,
+      "grad_norm": 8.030829429626465,
+      "learning_rate": 3.478260869565218e-05,
+      "loss": 2.6245,
+      "step": 32
+    },
+    {
+      "epoch": 0.2251655629139073,
+      "grad_norm": 7.201488971710205,
+      "learning_rate": 3.695652173913043e-05,
+      "loss": 2.1068,
+      "step": 34
+    },
+    {
+      "epoch": 0.23841059602649006,
+      "grad_norm": 6.687085151672363,
+      "learning_rate": 3.804347826086957e-05,
+      "loss": 2.065,
+      "step": 36
+    },
+    {
+      "epoch": 0.25165562913907286,
+      "grad_norm": 7.2274675369262695,
+      "learning_rate": 4.021739130434783e-05,
+      "loss": 2.2102,
+      "step": 38
+    },
+    {
+      "epoch": 0.26490066225165565,
+      "grad_norm": 8.127673149108887,
+      "learning_rate": 4.239130434782609e-05,
+      "loss": 2.1585,
+      "step": 40
+    },
+    {
+      "epoch": 0.2781456953642384,
+      "grad_norm": 8.082962989807129,
+      "learning_rate": 4.456521739130435e-05,
+      "loss": 1.651,
+      "step": 42
+    },
+    {
+      "epoch": 0.2913907284768212,
+      "grad_norm": 9.228837013244629,
+      "learning_rate": 4.673913043478261e-05,
+      "loss": 2.5421,
+      "step": 44
+    },
+    {
+      "epoch": 0.304635761589404,
+      "grad_norm": 8.194953918457031,
+      "learning_rate": 4.891304347826087e-05,
+      "loss": 1.7752,
+      "step": 46
+    },
+    {
+      "epoch": 0.31788079470198677,
+      "grad_norm": 11.126862525939941,
+      "learning_rate": 4.987714987714988e-05,
+      "loss": 2.6941,
+      "step": 48
+    },
+    {
+      "epoch": 0.33112582781456956,
+      "grad_norm": 7.496665954589844,
+      "learning_rate": 4.963144963144963e-05,
+      "loss": 2.156,
+      "step": 50
+    },
+    {
+      "epoch": 0.3443708609271523,
+      "grad_norm": 6.483855724334717,
+      "learning_rate": 4.9385749385749387e-05,
+      "loss": 1.7352,
+      "step": 52
+    },
+    {
+      "epoch": 0.3576158940397351,
+      "grad_norm": 7.87312650680542,
+      "learning_rate": 4.914004914004915e-05,
+      "loss": 1.8533,
+      "step": 54
+    },
+    {
+      "epoch": 0.3708609271523179,
+      "grad_norm": 14.155879020690918,
+      "learning_rate": 4.8894348894348894e-05,
+      "loss": 1.9163,
+      "step": 56
+    },
+    {
+      "epoch": 0.3841059602649007,
+      "grad_norm": 6.899637699127197,
+      "learning_rate": 4.8648648648648654e-05,
+      "loss": 2.0497,
+      "step": 58
+    },
+    {
+      "epoch": 0.3973509933774834,
+      "grad_norm": 9.851097106933594,
+      "learning_rate": 4.840294840294841e-05,
+      "loss": 2.0549,
+      "step": 60
+    },
+    {
+      "epoch": 0.4105960264900662,
+      "grad_norm": 7.761284828186035,
+      "learning_rate": 4.8157248157248155e-05,
+      "loss": 2.4832,
+      "step": 62
+    },
+    {
+      "epoch": 0.423841059602649,
+      "grad_norm": 7.7907819747924805,
+      "learning_rate": 4.7911547911547915e-05,
+      "loss": 2.0239,
+      "step": 64
+    },
+    {
+      "epoch": 0.4370860927152318,
+      "grad_norm": 6.249843597412109,
+      "learning_rate": 4.766584766584767e-05,
+      "loss": 1.5607,
+      "step": 66
+    },
+    {
+      "epoch": 0.4503311258278146,
+      "grad_norm": 5.760603904724121,
+      "learning_rate": 4.742014742014742e-05,
+      "loss": 1.7551,
+      "step": 68
+    },
+    {
+      "epoch": 0.46357615894039733,
+      "grad_norm": 8.171866416931152,
+      "learning_rate": 4.7174447174447176e-05,
+      "loss": 1.7013,
+      "step": 70
+    },
+    {
+      "epoch": 0.4768211920529801,
+      "grad_norm": 7.343658924102783,
+      "learning_rate": 4.692874692874693e-05,
+      "loss": 2.1362,
+      "step": 72
+    },
+    {
+      "epoch": 0.4900662251655629,
+      "grad_norm": 11.477375030517578,
+      "learning_rate": 4.6683046683046684e-05,
+      "loss": 2.0896,
+      "step": 74
+    },
+    {
+      "epoch": 0.5033112582781457,
+      "grad_norm": 6.7531256675720215,
+      "learning_rate": 4.6437346437346444e-05,
+      "loss": 1.5969,
+      "step": 76
+    },
+    {
+      "epoch": 0.5165562913907285,
+      "grad_norm": 9.825312614440918,
+      "learning_rate": 4.619164619164619e-05,
+      "loss": 2.1228,
+      "step": 78
+    },
+    {
+      "epoch": 0.5298013245033113,
+      "grad_norm": 7.430478096008301,
+      "learning_rate": 4.594594594594595e-05,
+      "loss": 2.1018,
+      "step": 80
+    },
+    {
+      "epoch": 0.543046357615894,
+      "grad_norm": 5.134402275085449,
+      "learning_rate": 4.5700245700245705e-05,
+      "loss": 1.5301,
+      "step": 82
+    },
+    {
+      "epoch": 0.5562913907284768,
+      "grad_norm": 11.335017204284668,
+      "learning_rate": 4.545454545454546e-05,
+      "loss": 1.5022,
+      "step": 84
+    },
+    {
+      "epoch": 0.5695364238410596,
+      "grad_norm": 8.13528823852539,
+      "learning_rate": 4.520884520884521e-05,
+      "loss": 2.2219,
+      "step": 86
+    },
+    {
+      "epoch": 0.5827814569536424,
+      "grad_norm": 5.774362564086914,
+      "learning_rate": 4.4963144963144966e-05,
+      "loss": 1.5469,
+      "step": 88
+    },
+    {
+      "epoch": 0.5960264900662252,
+      "grad_norm": 10.505985260009766,
+      "learning_rate": 4.471744471744472e-05,
+      "loss": 2.1449,
+      "step": 90
+    },
+    {
+      "epoch": 0.609271523178808,
+      "grad_norm": 9.9073486328125,
+      "learning_rate": 4.447174447174447e-05,
+      "loss": 1.9911,
+      "step": 92
+    },
+    {
+      "epoch": 0.6225165562913907,
+      "grad_norm": 7.591387748718262,
+      "learning_rate": 4.422604422604423e-05,
+      "loss": 1.2767,
+      "step": 94
+    },
+    {
+      "epoch": 0.6357615894039735,
+      "grad_norm": 12.243816375732422,
+      "learning_rate": 4.398034398034398e-05,
+      "loss": 2.4575,
+      "step": 96
+    },
+    {
+      "epoch": 0.6490066225165563,
+      "grad_norm": 12.278913497924805,
+      "learning_rate": 4.373464373464374e-05,
+      "loss": 2.6426,
+      "step": 98
+    },
+    {
+      "epoch": 0.6622516556291391,
+      "grad_norm": 8.50022029876709,
+      "learning_rate": 4.348894348894349e-05,
+      "loss": 1.8184,
+      "step": 100
+    },
+    {
+      "epoch": 0.6754966887417219,
+      "grad_norm": 15.808201789855957,
+      "learning_rate": 4.324324324324325e-05,
+      "loss": 1.8794,
+      "step": 102
+    },
+    {
+      "epoch": 0.6887417218543046,
+      "grad_norm": 11.01276969909668,
+      "learning_rate": 4.2997542997543e-05,
+      "loss": 1.7045,
+      "step": 104
+    },
+    {
+      "epoch": 0.7019867549668874,
+      "grad_norm": 6.432559490203857,
+      "learning_rate": 4.2751842751842756e-05,
+      "loss": 1.3662,
+      "step": 106
+    },
+    {
+      "epoch": 0.7152317880794702,
+      "grad_norm": 12.68566608428955,
+      "learning_rate": 4.250614250614251e-05,
+      "loss": 1.0612,
+      "step": 108
+    },
+    {
+      "epoch": 0.7284768211920529,
+      "grad_norm": 7.320284843444824,
+      "learning_rate": 4.226044226044226e-05,
+      "loss": 1.6157,
+      "step": 110
+    },
+    {
+      "epoch": 0.7417218543046358,
+      "grad_norm": 9.558548927307129,
+      "learning_rate": 4.2014742014742017e-05,
+      "loss": 1.6096,
+      "step": 112
+    },
+    {
+      "epoch": 0.7549668874172185,
+      "grad_norm": 12.876482009887695,
+      "learning_rate": 4.176904176904177e-05,
+      "loss": 2.0925,
+      "step": 114
+    },
+    {
+      "epoch": 0.7682119205298014,
+      "grad_norm": 16.353004455566406,
+      "learning_rate": 4.1523341523341524e-05,
+      "loss": 2.4218,
+      "step": 116
+    },
+    {
+      "epoch": 0.7814569536423841,
+      "grad_norm": 8.213098526000977,
+      "learning_rate": 4.127764127764128e-05,
+      "loss": 1.4581,
+      "step": 118
+    },
+    {
+      "epoch": 0.7947019867549668,
+      "grad_norm": 7.289383888244629,
+      "learning_rate": 4.103194103194104e-05,
+      "loss": 1.3969,
+      "step": 120
+    },
+    {
+      "epoch": 0.8079470198675497,
+      "grad_norm": 12.726005554199219,
+      "learning_rate": 4.0786240786240785e-05,
+      "loss": 1.6086,
+      "step": 122
+    },
+    {
+      "epoch": 0.8211920529801324,
+      "grad_norm": 6.612705230712891,
+      "learning_rate": 4.0540540540540545e-05,
+      "loss": 1.228,
+      "step": 124
+    },
+    {
+      "epoch": 0.8344370860927153,
+      "grad_norm": 8.954344749450684,
+      "learning_rate": 4.02948402948403e-05,
+      "loss": 1.9795,
+      "step": 126
+    },
+    {
+      "epoch": 0.847682119205298,
+      "grad_norm": 14.46446418762207,
+      "learning_rate": 4.004914004914005e-05,
+      "loss": 1.4446,
+      "step": 128
+    },
+    {
+      "epoch": 0.8609271523178808,
+      "grad_norm": 6.220669746398926,
+      "learning_rate": 3.9803439803439806e-05,
+      "loss": 2.0856,
+      "step": 130
+    },
+    {
+      "epoch": 0.8741721854304636,
+      "grad_norm": 6.742675304412842,
+      "learning_rate": 3.955773955773956e-05,
+      "loss": 1.1761,
+      "step": 132
+    },
+    {
+      "epoch": 0.8874172185430463,
+      "grad_norm": 6.69525146484375,
+      "learning_rate": 3.9312039312039314e-05,
+      "loss": 1.2581,
+      "step": 134
+    },
+    {
+      "epoch": 0.9006622516556292,
+      "grad_norm": 5.910060405731201,
+      "learning_rate": 3.906633906633907e-05,
+      "loss": 1.1815,
+      "step": 136
+    },
+    {
+      "epoch": 0.9139072847682119,
+      "grad_norm": 5.171950817108154,
+      "learning_rate": 3.882063882063882e-05,
+      "loss": 0.9752,
+      "step": 138
+    },
+    {
+      "epoch": 0.9271523178807947,
+      "grad_norm": 7.012332916259766,
+      "learning_rate": 3.857493857493858e-05,
+      "loss": 1.7936,
+      "step": 140
+    },
+    {
+      "epoch": 0.9403973509933775,
+      "grad_norm": 7.54193639755249,
+      "learning_rate": 3.8329238329238335e-05,
+      "loss": 1.4808,
+      "step": 142
+    },
+    {
+      "epoch": 0.9536423841059603,
+      "grad_norm": 7.4099531173706055,
+      "learning_rate": 3.808353808353808e-05,
+      "loss": 1.2554,
+      "step": 144
+    },
+    {
+      "epoch": 0.9668874172185431,
+      "grad_norm": 10.860753059387207,
+      "learning_rate": 3.783783783783784e-05,
+      "loss": 2.0704,
+      "step": 146
+    },
+    {
+      "epoch": 0.9801324503311258,
+      "grad_norm": 6.87387752532959,
+      "learning_rate": 3.7592137592137596e-05,
+      "loss": 1.6014,
+      "step": 148
+    },
+    {
+      "epoch": 0.9933774834437086,
+      "grad_norm": 6.1860175132751465,
+      "learning_rate": 3.734643734643735e-05,
+      "loss": 0.9634,
+      "step": 150
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.43,
+      "eval_f1_macro": 0.10414486290241251,
+      "eval_f1_micro": 0.43,
+      "eval_f1_weighted": 0.3778887980009637,
+      "eval_loss": 2.039196252822876,
+      "eval_precision_macro": 0.12514957264957266,
+      "eval_precision_micro": 0.43,
+      "eval_precision_weighted": 0.4138803418803419,
+      "eval_recall_macro": 0.1286363636363636,
+      "eval_recall_micro": 0.43,
+      "eval_recall_weighted": 0.43,
+      "eval_runtime": 0.4069,
+      "eval_samples_per_second": 245.77,
+      "eval_steps_per_second": 17.204,
+      "step": 151
+    },
+    {
+      "epoch": 1.0066225165562914,
+      "grad_norm": 13.422128677368164,
+      "learning_rate": 3.71007371007371e-05,
+      "loss": 1.1134,
+      "step": 152
+    },
+    {
+      "epoch": 1.0198675496688743,
+      "grad_norm": 7.478858947753906,
+      "learning_rate": 3.685503685503686e-05,
+      "loss": 1.177,
+      "step": 154
+    },
+    {
+      "epoch": 1.033112582781457,
+      "grad_norm": 9.430647850036621,
+      "learning_rate": 3.660933660933661e-05,
+      "loss": 2.2911,
+      "step": 156
+    },
+    {
+      "epoch": 1.0463576158940397,
+      "grad_norm": 5.825343608856201,
+      "learning_rate": 3.6363636363636364e-05,
+      "loss": 1.2509,
+      "step": 158
+    },
+    {
+      "epoch": 1.0596026490066226,
+      "grad_norm": 9.491595268249512,
+      "learning_rate": 3.611793611793612e-05,
+      "loss": 1.3177,
+      "step": 160
+    },
+    {
+      "epoch": 1.0728476821192052,
+      "grad_norm": 5.076850891113281,
+      "learning_rate": 3.587223587223588e-05,
+      "loss": 0.9434,
+      "step": 162
+    },
+    {
+      "epoch": 1.086092715231788,
+      "grad_norm": 5.8091206550598145,
+      "learning_rate": 3.562653562653563e-05,
+      "loss": 1.4945,
+      "step": 164
+    },
+    {
+      "epoch": 1.099337748344371,
+      "grad_norm": 5.286921501159668,
+      "learning_rate": 3.538083538083538e-05,
+      "loss": 0.9087,
+      "step": 166
+    },
+    {
+      "epoch": 1.1125827814569536,
+      "grad_norm": 14.105121612548828,
+      "learning_rate": 3.513513513513514e-05,
+      "loss": 1.8953,
+      "step": 168
+    },
+    {
+      "epoch": 1.1258278145695364,
+      "grad_norm": 7.049529552459717,
+      "learning_rate": 3.488943488943489e-05,
+      "loss": 1.2879,
+      "step": 170
+    },
+    {
+      "epoch": 1.1390728476821192,
+      "grad_norm": 5.330937385559082,
+      "learning_rate": 3.4643734643734647e-05,
+      "loss": 1.0429,
+      "step": 172
+    },
+    {
+      "epoch": 1.152317880794702,
+      "grad_norm": 14.564863204956055,
+      "learning_rate": 3.43980343980344e-05,
+      "loss": 1.2875,
+      "step": 174
+    },
+    {
+      "epoch": 1.1655629139072847,
+      "grad_norm": 13.017091751098633,
+      "learning_rate": 3.4152334152334154e-05,
+      "loss": 1.0357,
+      "step": 176
+    },
+    {
+      "epoch": 1.1788079470198676,
+      "grad_norm": 13.703240394592285,
+      "learning_rate": 3.390663390663391e-05,
+      "loss": 1.5905,
+      "step": 178
+    },
+    {
+      "epoch": 1.1920529801324504,
+      "grad_norm": 7.464919567108154,
+      "learning_rate": 3.366093366093366e-05,
+      "loss": 1.5741,
+      "step": 180
+    },
+    {
+      "epoch": 1.205298013245033,
+      "grad_norm": 7.947140216827393,
+      "learning_rate": 3.3415233415233415e-05,
+      "loss": 1.845,
+      "step": 182
+    },
+    {
+      "epoch": 1.218543046357616,
+      "grad_norm": 6.652373790740967,
+      "learning_rate": 3.3169533169533175e-05,
+      "loss": 1.4041,
+      "step": 184
+    },
+    {
+      "epoch": 1.2317880794701987,
+      "grad_norm": 5.824616432189941,
+      "learning_rate": 3.292383292383293e-05,
+      "loss": 1.5873,
+      "step": 186
+    },
+    {
+      "epoch": 1.2450331125827814,
+      "grad_norm": 10.095503807067871,
+      "learning_rate": 3.2678132678132676e-05,
+      "loss": 1.4017,
+      "step": 188
+    },
+    {
+      "epoch": 1.2582781456953642,
+      "grad_norm": 5.2991766929626465,
+      "learning_rate": 3.2432432432432436e-05,
+      "loss": 1.395,
+      "step": 190
+    },
+    {
+      "epoch": 1.271523178807947,
+      "grad_norm": 4.981668472290039,
+      "learning_rate": 3.218673218673219e-05,
+      "loss": 1.4415,
+      "step": 192
+    },
+    {
+      "epoch": 1.2847682119205297,
+      "grad_norm": 8.634035110473633,
+      "learning_rate": 3.1941031941031943e-05,
+      "loss": 1.3291,
+      "step": 194
+    },
+    {
+      "epoch": 1.2980132450331126,
+      "grad_norm": 8.355801582336426,
+      "learning_rate": 3.16953316953317e-05,
+      "loss": 0.9761,
+      "step": 196
+    },
+    {
+      "epoch": 1.3112582781456954,
+      "grad_norm": NaN,
+      "learning_rate": 3.1572481572481574e-05,
+      "loss": 1.1352,
+      "step": 198
+    },
+    {
+      "epoch": 1.3245033112582782,
+      "grad_norm": 5.318390846252441,
+      "learning_rate": 3.132678132678133e-05,
+      "loss": 1.1314,
+      "step": 200
+    },
+    {
+      "epoch": 1.3377483443708609,
+      "grad_norm": 7.791065692901611,
+      "learning_rate": 3.108108108108108e-05,
+      "loss": 1.5313,
+      "step": 202
+    },
+    {
+      "epoch": 1.3509933774834437,
+      "grad_norm": 21.741308212280273,
+      "learning_rate": 3.083538083538084e-05,
+      "loss": 1.7144,
+      "step": 204
+    },
+    {
+      "epoch": 1.3642384105960264,
+      "grad_norm": 14.097997665405273,
+      "learning_rate": 3.058968058968059e-05,
+      "loss": 1.749,
+      "step": 206
+    },
+    {
+      "epoch": 1.3774834437086092,
+      "grad_norm": 6.286694526672363,
+      "learning_rate": 3.0343980343980342e-05,
+      "loss": 1.3443,
+      "step": 208
+    },
+    {
+      "epoch": 1.390728476821192,
+      "grad_norm": 7.453071117401123,
+      "learning_rate": 3.0098280098280103e-05,
+      "loss": 0.85,
+      "step": 210
+    },
+    {
+      "epoch": 1.403973509933775,
+      "grad_norm": 10.761088371276855,
+      "learning_rate": 2.9852579852579853e-05,
+      "loss": 0.8886,
+      "step": 212
+    },
+    {
+      "epoch": 1.4172185430463577,
+      "grad_norm": 5.939634799957275,
+      "learning_rate": 2.9606879606879607e-05,
+      "loss": 1.2804,
+      "step": 214
+    },
+    {
+      "epoch": 1.4304635761589404,
+      "grad_norm": 9.565838813781738,
+      "learning_rate": 2.9361179361179364e-05,
+      "loss": 1.5924,
+      "step": 216
+    },
+    {
+      "epoch": 1.4437086092715232,
+      "grad_norm": 10.187567710876465,
+      "learning_rate": 2.9115479115479117e-05,
+      "loss": 1.3317,
+      "step": 218
+    },
+    {
+      "epoch": 1.4569536423841059,
+      "grad_norm": 8.318490028381348,
+      "learning_rate": 2.8869778869778868e-05,
+      "loss": 1.617,
+      "step": 220
+    },
+    {
+      "epoch": 1.4701986754966887,
+      "grad_norm": 7.841922283172607,
+      "learning_rate": 2.8624078624078625e-05,
+      "loss": 1.463,
+      "step": 222
+    },
+    {
+      "epoch": 1.4834437086092715,
+      "grad_norm": 6.505579471588135,
+      "learning_rate": 2.8378378378378378e-05,
+      "loss": 1.123,
+      "step": 224
+    },
+    {
+      "epoch": 1.4966887417218544,
+      "grad_norm": 5.509284496307373,
+      "learning_rate": 2.8132678132678135e-05,
+      "loss": 1.0619,
+      "step": 226
+    },
+    {
+      "epoch": 1.5099337748344372,
+      "grad_norm": 4.9719367027282715,
+      "learning_rate": 2.788697788697789e-05,
+      "loss": 0.6785,
+      "step": 228
+    },
+    {
+      "epoch": 1.5231788079470199,
+      "grad_norm": 7.88023567199707,
+      "learning_rate": 2.764127764127764e-05,
+      "loss": 1.1481,
+      "step": 230
+    },
+    {
+      "epoch": 1.5364238410596025,
+      "grad_norm": 5.841632843017578,
+      "learning_rate": 2.73955773955774e-05,
+      "loss": 0.7702,
+      "step": 232
+    },
+    {
+      "epoch": 1.5496688741721854,
+      "grad_norm": 8.680448532104492,
+      "learning_rate": 2.714987714987715e-05,
+      "loss": 1.4025,
+      "step": 234
+    },
+    {
+      "epoch": 1.5629139072847682,
+      "grad_norm": 5.259287357330322,
+      "learning_rate": 2.6904176904176904e-05,
+      "loss": 0.8707,
+      "step": 236
+    },
+    {
+      "epoch": 1.576158940397351,
+      "grad_norm": 5.991212368011475,
+      "learning_rate": 2.665847665847666e-05,
+      "loss": 0.5955,
+      "step": 238
+    },
+    {
+      "epoch": 1.589403973509934,
+      "grad_norm": 11.001547813415527,
+      "learning_rate": 2.6412776412776414e-05,
+      "loss": 1.5866,
+      "step": 240
+    },
+    {
+      "epoch": 1.6026490066225165,
+      "grad_norm": 5.934292316436768,
+      "learning_rate": 2.616707616707617e-05,
+      "loss": 1.3472,
+      "step": 242
+    },
+    {
+      "epoch": 1.6158940397350994,
+      "grad_norm": 16.11164665222168,
+      "learning_rate": 2.5921375921375925e-05,
+      "loss": 1.2663,
+      "step": 244
+    },
+    {
+      "epoch": 1.629139072847682,
+      "grad_norm": 10.690239906311035,
+      "learning_rate": 2.5675675675675675e-05,
+      "loss": 1.5754,
+      "step": 246
+    },
+    {
+      "epoch": 1.6423841059602649,
+      "grad_norm": 7.545533657073975,
+      "learning_rate": 2.5429975429975432e-05,
+      "loss": 0.9722,
+      "step": 248
+    },
+    {
+      "epoch": 1.6556291390728477,
+      "grad_norm": 7.82634162902832,
+      "learning_rate": 2.5184275184275186e-05,
+      "loss": 1.8525,
+      "step": 250
+    },
+    {
+      "epoch": 1.6688741721854305,
+      "grad_norm": 11.477102279663086,
+      "learning_rate": 2.493857493857494e-05,
+      "loss": 0.9904,
+      "step": 252
+    },
+    {
+      "epoch": 1.6821192052980134,
+      "grad_norm": 8.056424140930176,
+      "learning_rate": 2.4692874692874693e-05,
+      "loss": 1.6071,
+      "step": 254
+    },
+    {
+      "epoch": 1.695364238410596,
+      "grad_norm": 8.872392654418945,
+      "learning_rate": 2.4447174447174447e-05,
+      "loss": 1.8732,
+      "step": 256
+    },
+    {
+      "epoch": 1.7086092715231787,
+      "grad_norm": 8.307268142700195,
+      "learning_rate": 2.4201474201474204e-05,
+      "loss": 1.2843,
+      "step": 258
+    },
+    {
+      "epoch": 1.7218543046357615,
+      "grad_norm": 8.889561653137207,
+      "learning_rate": 2.3955773955773958e-05,
+      "loss": 1.8855,
+      "step": 260
+    },
+    {
+      "epoch": 1.7350993377483444,
+      "grad_norm": 7.262237071990967,
+      "learning_rate": 2.371007371007371e-05,
+      "loss": 0.6363,
+      "step": 262
+    },
+    {
+      "epoch": 1.7483443708609272,
+      "grad_norm": 9.860976219177246,
+      "learning_rate": 2.3464373464373465e-05,
+      "loss": 0.9588,
+      "step": 264
+    },
+    {
+      "epoch": 1.76158940397351,
+      "grad_norm": 8.475176811218262,
+      "learning_rate": 2.3218673218673222e-05,
+      "loss": 1.4638,
+      "step": 266
+    },
+    {
+      "epoch": 1.7748344370860927,
+      "grad_norm": 6.681599140167236,
+      "learning_rate": 2.2972972972972976e-05,
+      "loss": 0.9692,
+      "step": 268
+    },
+    {
+      "epoch": 1.7880794701986755,
+      "grad_norm": 9.975168228149414,
+      "learning_rate": 2.272727272727273e-05,
+      "loss": 2.0131,
+      "step": 270
+    },
+    {
+      "epoch": 1.8013245033112582,
+      "grad_norm": 9.302199363708496,
+      "learning_rate": 2.2481572481572483e-05,
+      "loss": 1.4159,
+      "step": 272
+    },
+    {
+      "epoch": 1.814569536423841,
+      "grad_norm": 10.039788246154785,
+      "learning_rate": 2.2235872235872237e-05,
+      "loss": 1.5486,
+      "step": 274
+    },
+    {
+      "epoch": 1.8278145695364238,
+      "grad_norm": 7.697207927703857,
+      "learning_rate": 2.199017199017199e-05,
+      "loss": 1.0797,
+      "step": 276
+    },
+    {
+      "epoch": 1.8410596026490067,
+      "grad_norm": 7.76730489730835,
+      "learning_rate": 2.1744471744471744e-05,
+      "loss": 1.1921,
+      "step": 278
+    },
+    {
+      "epoch": 1.8543046357615895,
+      "grad_norm": 7.227729320526123,
+      "learning_rate": 2.14987714987715e-05,
+      "loss": 1.6573,
+      "step": 280
+    },
+    {
+      "epoch": 1.8675496688741722,
+      "grad_norm": 6.351704120635986,
+      "learning_rate": 2.1253071253071255e-05,
+      "loss": 0.6416,
+      "step": 282
+    },
+    {
+      "epoch": 1.8807947019867548,
+      "grad_norm": 7.4418511390686035,
+      "learning_rate": 2.1007371007371008e-05,
+      "loss": 1.7128,
+      "step": 284
+    },
+    {
+      "epoch": 1.8940397350993377,
+      "grad_norm": 9.884121894836426,
+      "learning_rate": 2.0761670761670762e-05,
+      "loss": 2.3348,
+      "step": 286
+    },
+    {
+      "epoch": 1.9072847682119205,
+      "grad_norm": 11.193192481994629,
+      "learning_rate": 2.051597051597052e-05,
+      "loss": 0.9744,
+      "step": 288
+    },
+    {
+      "epoch": 1.9205298013245033,
+      "grad_norm": 6.8167829513549805,
+      "learning_rate": 2.0270270270270273e-05,
+      "loss": 1.2415,
+      "step": 290
+    },
+    {
+      "epoch": 1.9337748344370862,
+      "grad_norm": 6.478491306304932,
+      "learning_rate": 2.0024570024570026e-05,
+      "loss": 1.1914,
+      "step": 292
+    },
+    {
+      "epoch": 1.9470198675496688,
+      "grad_norm": 7.969542026519775,
+      "learning_rate": 1.977886977886978e-05,
+      "loss": 1.5221,
+      "step": 294
+    },
+    {
+      "epoch": 1.9602649006622517,
+      "grad_norm": 7.827632427215576,
+      "learning_rate": 1.9533169533169534e-05,
+      "loss": 0.7076,
+      "step": 296
+    },
+    {
+      "epoch": 1.9735099337748343,
+      "grad_norm": 5.793867111206055,
+      "learning_rate": 1.928746928746929e-05,
+      "loss": 1.1292,
+      "step": 298
+    },
+    {
+      "epoch": 1.9867549668874172,
+      "grad_norm": 7.288516998291016,
+      "learning_rate": 1.904176904176904e-05,
+      "loss": 0.8474,
+      "step": 300
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 19.54204559326172,
+      "learning_rate": 1.8796068796068798e-05,
+      "loss": 0.8084,
+      "step": 302
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.52,
+      "eval_f1_macro": 0.16279761904761905,
+      "eval_f1_micro": 0.52,
+      "eval_f1_weighted": 0.4582380952380952,
+      "eval_loss": 1.6385834217071533,
+      "eval_precision_macro": 0.17940422322775265,
+      "eval_precision_micro": 0.52,
+      "eval_precision_weighted": 0.47573152337858227,
+      "eval_recall_macro": 0.2141883116883117,
+      "eval_recall_micro": 0.52,
+      "eval_recall_weighted": 0.52,
+      "eval_runtime": 0.3788,
+      "eval_samples_per_second": 263.985,
+      "eval_steps_per_second": 18.479,
+      "step": 302
+    },
+    {
+      "epoch": 2.013245033112583,
+      "grad_norm": 3.1326494216918945,
+      "learning_rate": 1.855036855036855e-05,
+      "loss": 0.4869,
+      "step": 304
+    },
+    {
+      "epoch": 2.0264900662251657,
+      "grad_norm": 7.844410419464111,
+      "learning_rate": 1.8304668304668305e-05,
+      "loss": 1.0237,
+      "step": 306
+    },
+    {
+      "epoch": 2.0397350993377485,
+      "grad_norm": 8.277088165283203,
+      "learning_rate": 1.805896805896806e-05,
+      "loss": 0.8911,
+      "step": 308
+    },
+    {
+      "epoch": 2.052980132450331,
+      "grad_norm": 4.819505214691162,
+      "learning_rate": 1.7813267813267816e-05,
+      "loss": 0.6859,
+      "step": 310
+    },
+    {
+      "epoch": 2.066225165562914,
+      "grad_norm": 7.4773359298706055,
+      "learning_rate": 1.756756756756757e-05,
+      "loss": 1.0567,
+      "step": 312
+    },
+    {
+      "epoch": 2.0794701986754967,
+      "grad_norm": 8.229156494140625,
+      "learning_rate": 1.7321867321867323e-05,
+      "loss": 1.2833,
+      "step": 314
+    },
+    {
+      "epoch": 2.0927152317880795,
+      "grad_norm": 11.932225227355957,
+      "learning_rate": 1.7076167076167077e-05,
+      "loss": 1.2839,
+      "step": 316
+    },
+    {
+      "epoch": 2.1059602649006623,
+      "grad_norm": 4.1798095703125,
+      "learning_rate": 1.683046683046683e-05,
+      "loss": 0.335,
+      "step": 318
+    },
+    {
+      "epoch": 2.119205298013245,
+      "grad_norm": 4.319066524505615,
+      "learning_rate": 1.6584766584766588e-05,
+      "loss": 0.3695,
+      "step": 320
+    },
+    {
+      "epoch": 2.1324503311258276,
+      "grad_norm": 4.868326187133789,
+      "learning_rate": 1.6339066339066338e-05,
+      "loss": 0.5056,
+      "step": 322
+    },
+    {
+      "epoch": 2.1456953642384105,
+      "grad_norm": 11.324278831481934,
+      "learning_rate": 1.6093366093366095e-05,
+      "loss": 1.6924,
+      "step": 324
+    },
+    {
+      "epoch": 2.1589403973509933,
+      "grad_norm": 7.622723579406738,
+      "learning_rate": 1.584766584766585e-05,
+      "loss": 1.1639,
+      "step": 326
+    },
+    {
+      "epoch": 2.172185430463576,
+      "grad_norm": 9.650656700134277,
+      "learning_rate": 1.5601965601965606e-05,
+      "loss": 1.449,
+      "step": 328
+    },
+    {
+      "epoch": 2.185430463576159,
+      "grad_norm": 8.743961334228516,
+      "learning_rate": 1.5356265356265356e-05,
+      "loss": 1.319,
+      "step": 330
+    },
+    {
+      "epoch": 2.198675496688742,
+      "grad_norm": 8.732221603393555,
+      "learning_rate": 1.5110565110565111e-05,
+      "loss": 1.2402,
+      "step": 332
+    },
+    {
+      "epoch": 2.2119205298013247,
+      "grad_norm": 7.377650737762451,
+      "learning_rate": 1.4864864864864867e-05,
+      "loss": 1.0142,
+      "step": 334
+    },
+    {
+      "epoch": 2.225165562913907,
+      "grad_norm": 7.734024524688721,
+      "learning_rate": 1.4619164619164619e-05,
+      "loss": 1.0699,
+      "step": 336
+    },
+    {
+      "epoch": 2.23841059602649,
+      "grad_norm": 6.139077663421631,
+      "learning_rate": 1.4373464373464374e-05,
+      "loss": 0.5206,
+      "step": 338
+    },
+    {
+      "epoch": 2.251655629139073,
+      "grad_norm": 7.571850299835205,
+      "learning_rate": 1.412776412776413e-05,
+      "loss": 0.8841,
+      "step": 340
+    },
+    {
+      "epoch": 2.2649006622516556,
+      "grad_norm": 9.102130889892578,
+      "learning_rate": 1.3882063882063885e-05,
+      "loss": 0.6576,
+      "step": 342
+    },
+    {
+      "epoch": 2.2781456953642385,
+      "grad_norm": 8.144769668579102,
+      "learning_rate": 1.3636363636363637e-05,
+      "loss": 0.7361,
+      "step": 344
+    },
+    {
+      "epoch": 2.2913907284768213,
+      "grad_norm": 9.98493480682373,
+      "learning_rate": 1.339066339066339e-05,
+      "loss": 1.1676,
+      "step": 346
+    },
+    {
+      "epoch": 2.304635761589404,
+      "grad_norm": 10.540783882141113,
+      "learning_rate": 1.3144963144963146e-05,
+      "loss": 1.3709,
+      "step": 348
+    },
+    {
+      "epoch": 2.3178807947019866,
+      "grad_norm": 10.3671293258667,
+      "learning_rate": 1.2899262899262901e-05,
+      "loss": 0.8397,
+      "step": 350
+    },
+    {
+      "epoch": 2.3311258278145695,
+      "grad_norm": 4.67352819442749,
+      "learning_rate": 1.2653562653562653e-05,
+      "loss": 0.539,
+      "step": 352
+    },
+    {
+      "epoch": 2.3443708609271523,
+      "grad_norm": 9.957860946655273,
+      "learning_rate": 1.2407862407862408e-05,
+      "loss": 1.4744,
+      "step": 354
+    },
+    {
+      "epoch": 2.357615894039735,
+      "grad_norm": 8.535019874572754,
+      "learning_rate": 1.2162162162162164e-05,
+      "loss": 1.3841,
+      "step": 356
+    },
+    {
+      "epoch": 2.370860927152318,
+      "grad_norm": 8.11552906036377,
+      "learning_rate": 1.1916461916461917e-05,
+      "loss": 1.1301,
+      "step": 358
+    },
+    {
+      "epoch": 2.384105960264901,
+      "grad_norm": 7.197366237640381,
+      "learning_rate": 1.1670761670761671e-05,
+      "loss": 0.6156,
+      "step": 360
+    },
+    {
+      "epoch": 2.3973509933774833,
+      "grad_norm": 4.6600022315979,
+      "learning_rate": 1.1425061425061426e-05,
+      "loss": 0.7303,
+      "step": 362
+    },
+    {
+      "epoch": 2.410596026490066,
+      "grad_norm": 6.252689838409424,
+      "learning_rate": 1.117936117936118e-05,
+      "loss": 0.9769,
+      "step": 364
+    },
+    {
+      "epoch": 2.423841059602649,
+      "grad_norm": 9.90804672241211,
+      "learning_rate": 1.0933660933660935e-05,
+      "loss": 1.093,
+      "step": 366
+    },
+    {
+      "epoch": 2.437086092715232,
+      "grad_norm": 9.742344856262207,
+      "learning_rate": 1.0687960687960689e-05,
+      "loss": 1.0279,
+      "step": 368
+    },
+    {
+      "epoch": 2.4503311258278146,
+      "grad_norm": 9.368986129760742,
+      "learning_rate": 1.0442260442260443e-05,
+      "loss": 1.2061,
+      "step": 370
+    },
+    {
+      "epoch": 2.4635761589403975,
+      "grad_norm": 6.378131866455078,
+      "learning_rate": 1.0196560196560196e-05,
+      "loss": 1.0066,
+      "step": 372
+    },
+    {
+      "epoch": 2.47682119205298,
+      "grad_norm": 7.304665565490723,
+      "learning_rate": 9.950859950859952e-06,
+      "loss": 2.2284,
+      "step": 374
+    },
+    {
+      "epoch": 2.4900662251655628,
+      "grad_norm": 5.73162317276001,
+      "learning_rate": 9.705159705159705e-06,
+      "loss": 0.4927,
+      "step": 376
+    },
+    {
+      "epoch": 2.5033112582781456,
+      "grad_norm": 9.693008422851562,
+      "learning_rate": 9.45945945945946e-06,
+      "loss": 0.9368,
+      "step": 378
+    },
+    {
+      "epoch": 2.5165562913907285,
+      "grad_norm": 7.258613586425781,
+      "learning_rate": 9.213759213759214e-06,
+      "loss": 0.5097,
+      "step": 380
+    },
+    {
+      "epoch": 2.5298013245033113,
+      "grad_norm": 6.431227684020996,
+      "learning_rate": 8.96805896805897e-06,
+      "loss": 0.9039,
+      "step": 382
+    },
+    {
+      "epoch": 2.543046357615894,
+      "grad_norm": 8.578726768493652,
+      "learning_rate": 8.722358722358723e-06,
+      "loss": 0.8253,
+      "step": 384
+    },
+    {
+      "epoch": 2.556291390728477,
+      "grad_norm": 5.389899730682373,
+      "learning_rate": 8.476658476658477e-06,
+      "loss": 0.4513,
+      "step": 386
+    },
+    {
+      "epoch": 2.5695364238410594,
+      "grad_norm": 10.525187492370605,
+      "learning_rate": 8.230958230958232e-06,
+      "loss": 1.1571,
+      "step": 388
+    },
+    {
+      "epoch": 2.5827814569536423,
+      "grad_norm": 5.4934186935424805,
+      "learning_rate": 7.985257985257986e-06,
+      "loss": 0.7821,
+      "step": 390
+    },
+    {
+      "epoch": 2.596026490066225,
+      "grad_norm": 9.986252784729004,
+      "learning_rate": 7.73955773955774e-06,
+      "loss": 0.8019,
+      "step": 392
+    },
+    {
+      "epoch": 2.609271523178808,
+      "grad_norm": 9.1427001953125,
+      "learning_rate": 7.493857493857494e-06,
+      "loss": 1.349,
+      "step": 394
+    },
+    {
+      "epoch": 2.622516556291391,
+      "grad_norm": 8.039468765258789,
+      "learning_rate": 7.2481572481572485e-06,
+      "loss": 1.0867,
+      "step": 396
+    },
+    {
+      "epoch": 2.6357615894039736,
+      "grad_norm": 9.093656539916992,
+      "learning_rate": 7.002457002457002e-06,
+      "loss": 1.5422,
+      "step": 398
+    },
+    {
+      "epoch": 2.6490066225165565,
+      "grad_norm": 13.18973445892334,
+      "learning_rate": 6.7567567567567575e-06,
+      "loss": 0.4601,
+      "step": 400
+    },
+    {
+      "epoch": 2.662251655629139,
+      "grad_norm": 9.986296653747559,
+      "learning_rate": 6.511056511056511e-06,
+      "loss": 1.0286,
+      "step": 402
+    },
+    {
+      "epoch": 2.6754966887417218,
+      "grad_norm": 9.373101234436035,
+      "learning_rate": 6.2653562653562665e-06,
+      "loss": 0.8767,
+      "step": 404
+    },
+    {
+      "epoch": 2.6887417218543046,
+      "grad_norm": 9.866012573242188,
+      "learning_rate": 6.019656019656019e-06,
+      "loss": 1.1937,
+      "step": 406
+    },
+    {
+      "epoch": 2.7019867549668874,
+      "grad_norm": 6.277444839477539,
+      "learning_rate": 5.773955773955774e-06,
+      "loss": 1.1616,
+      "step": 408
+    },
+    {
+      "epoch": 2.7152317880794703,
+      "grad_norm": 5.679011344909668,
+      "learning_rate": 5.528255528255528e-06,
+      "loss": 0.7223,
+      "step": 410
+    },
+    {
+      "epoch": 2.7284768211920527,
+      "grad_norm": 4.74644660949707,
+      "learning_rate": 5.282555282555283e-06,
+      "loss": 0.5082,
+      "step": 412
+    },
+    {
+      "epoch": 2.741721854304636,
+      "grad_norm": 13.121922492980957,
+      "learning_rate": 5.036855036855037e-06,
+      "loss": 1.5552,
+      "step": 414
+    },
+    {
+      "epoch": 2.7549668874172184,
+      "grad_norm": 8.272148132324219,
+      "learning_rate": 4.791154791154792e-06,
+      "loss": 0.9687,
+      "step": 416
+    },
+    {
+      "epoch": 2.7682119205298013,
+      "grad_norm": 7.133453369140625,
+      "learning_rate": 4.5454545454545455e-06,
+      "loss": 0.5568,
+      "step": 418
+    },
+    {
+      "epoch": 2.781456953642384,
+      "grad_norm": 5.385309219360352,
+      "learning_rate": 4.2997542997543e-06,
+      "loss": 0.5379,
+      "step": 420
+    },
+    {
+      "epoch": 2.794701986754967,
+      "grad_norm": 9.613791465759277,
+      "learning_rate": 4.0540540540540545e-06,
+      "loss": 1.3592,
+      "step": 422
+    },
+    {
+      "epoch": 2.80794701986755,
+      "grad_norm": 8.493631362915039,
+      "learning_rate": 3.8083538083538086e-06,
+      "loss": 1.1874,
+      "step": 424
+    },
+    {
+      "epoch": 2.821192052980132,
+      "grad_norm": 5.972334861755371,
+      "learning_rate": 3.562653562653563e-06,
+      "loss": 0.8684,
+      "step": 426
+    },
+    {
+      "epoch": 2.8344370860927155,
+      "grad_norm": 10.33562183380127,
+      "learning_rate": 3.3169533169533168e-06,
+      "loss": 1.0385,
+      "step": 428
+    },
+    {
+      "epoch": 2.847682119205298,
+      "grad_norm": 4.6230363845825195,
+      "learning_rate": 3.0712530712530717e-06,
+      "loss": 0.8805,
+      "step": 430
+    },
+    {
+      "epoch": 2.8609271523178808,
+      "grad_norm": 16.691070556640625,
+      "learning_rate": 2.9484029484029485e-06,
+      "loss": 1.494,
+      "step": 432
+    },
+    {
+      "epoch": 2.8741721854304636,
+      "grad_norm": 6.581092834472656,
+      "learning_rate": 2.702702702702703e-06,
+      "loss": 0.4563,
+      "step": 434
+    },
+    {
+      "epoch": 2.8874172185430464,
+      "grad_norm": 8.806626319885254,
+      "learning_rate": 2.457002457002457e-06,
+      "loss": 1.3625,
+      "step": 436
+    },
+    {
+      "epoch": 2.9006622516556293,
+      "grad_norm": 5.90773344039917,
+      "learning_rate": 2.211302211302211e-06,
+      "loss": 1.1598,
+      "step": 438
+    },
+    {
+      "epoch": 2.9139072847682117,
+      "grad_norm": 7.406730651855469,
+      "learning_rate": 1.9656019656019657e-06,
+      "loss": 0.7975,
+      "step": 440
+    },
+    {
+      "epoch": 2.9271523178807946,
+      "grad_norm": 5.467130184173584,
+      "learning_rate": 1.71990171990172e-06,
+      "loss": 0.4307,
+      "step": 442
+    },
+    {
+      "epoch": 2.9403973509933774,
+      "grad_norm": 7.527857780456543,
+      "learning_rate": 1.4742014742014743e-06,
+      "loss": 0.6769,
+      "step": 444
+    },
+    {
+      "epoch": 2.9536423841059603,
+      "grad_norm": 11.855218887329102,
+      "learning_rate": 1.2285012285012285e-06,
+      "loss": 1.4558,
+      "step": 446
+    },
+    {
+      "epoch": 2.966887417218543,
+      "grad_norm": 9.057221412658691,
+      "learning_rate": 9.828009828009828e-07,
+      "loss": 1.6682,
+      "step": 448
+    },
+    {
+      "epoch": 2.980132450331126,
+      "grad_norm": 8.127178192138672,
+      "learning_rate": 7.371007371007371e-07,
+      "loss": 0.9139,
+      "step": 450
+    },
+    {
+      "epoch": 2.993377483443709,
+      "grad_norm": 7.9965128898620605,
+      "learning_rate": 4.914004914004914e-07,
+      "loss": 0.7603,
+      "step": 452
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.64,
+      "eval_f1_macro": 0.28164367547346275,
+      "eval_f1_micro": 0.64,
+      "eval_f1_weighted": 0.5917376665887304,
+      "eval_loss": 1.4913766384124756,
+      "eval_precision_macro": 0.2705775014459225,
+      "eval_precision_micro": 0.64,
+      "eval_precision_weighted": 0.5802396761133604,
+      "eval_recall_macro": 0.3324350649350649,
+      "eval_recall_micro": 0.64,
+      "eval_recall_weighted": 0.64,
+      "eval_runtime": 0.4059,
+      "eval_samples_per_second": 246.346,
+      "eval_steps_per_second": 17.244,
+      "step": 453
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 453,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 237039835640832.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-453/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28b30d78c7419f944218ae57eb5e536b3505745c5786856999a5e4567e999875
+size 5048

config.json ADDED Viewed

	@@ -0,0 +1,80 @@

+{
+  "_name_or_path": "microsoft/deberta-v3-base",
+  "_num_labels": 20,
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "ALLERGY",
+    "1": "ASSESSMENT",
+    "2": "CC",
+    "3": "DIAGNOSIS",
+    "4": "DISPOSITION",
+    "5": "EDCOURSE",
+    "6": "EXAM",
+    "7": "FAM/SOCHX",
+    "8": "GENHX",
+    "9": "GYNHX",
+    "10": "IMAGING",
+    "11": "IMMUNIZATIONS",
+    "12": "LABS",
+    "13": "MEDICATIONS",
+    "14": "OTHER_HISTORY",
+    "15": "PASTMEDICALHX",
+    "16": "PASTSURGICAL",
+    "17": "PLAN",
+    "18": "PROCEDURES",
+    "19": "ROS"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "ALLERGY": 0,
+    "ASSESSMENT": 1,
+    "CC": 2,
+    "DIAGNOSIS": 3,
+    "DISPOSITION": 4,
+    "EDCOURSE": 5,
+    "EXAM": 6,
+    "FAM/SOCHX": 7,
+    "GENHX": 8,
+    "GYNHX": 9,
+    "IMAGING": 10,
+    "IMMUNIZATIONS": 11,
+    "LABS": 12,
+    "MEDICATIONS": 13,
+    "OTHER_HISTORY": 14,
+    "PASTMEDICALHX": 15,
+    "PASTSURGICAL": 16,
+    "PLAN": 17,
+    "PROCEDURES": 18,
+    "ROS": 19
+  },
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.1",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9978b903ae04e38477cfbe4c26b677f26202043973af526dd217734af02f25a8
+size 737774648

runs/Apr28_03-13-55_r-dhananjay2912-deberta-section-classifier-el2khqmv-8c8a0-vi3xw/events.out.tfevents.1714274035.r-dhananjay2912-deberta-section-classifier-el2khqmv-8c8a0-vi3xw.61.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7f6c3e746d4d1d25aa051472672474e43074ad0e12a02848c7a5dbd90824eb4
-size 5755

 version https://git-lfs.github.com/spec/v1
+oid sha256:6933b6b4a27dedeb2b8eb01af87227854753cfbf93cea74d8422f6ee9a72244d
+size 56042

runs/Apr28_03-13-55_r-dhananjay2912-deberta-section-classifier-el2khqmv-8c8a0-vi3xw/events.out.tfevents.1714274127.r-dhananjay2912-deberta-section-classifier-el2khqmv-8c8a0-vi3xw.61.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df63b97ee7735f56a342cf76e5e141e50bae4ae88f07528e38841cc018bb28b8
+size 921

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128000": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28b30d78c7419f944218ae57eb5e536b3505745c5786856999a5e4567e999875
+size 5048

training_params.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "data_path": "autotrain-9c20u-twasm/autotrain-data",
+    "model": "microsoft/deberta-v3-base",
+    "lr": 5e-05,
+    "epochs": 3,
+    "max_seq_length": 128,
+    "batch_size": 8,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 42,
+    "train_split": "train",
+    "valid_split": "validation",
+    "text_column": "autotrain_text",
+    "target_column": "autotrain_label",
+    "logging_steps": -1,
+    "project_name": "autotrain-9c20u-twasm",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "push_to_hub": true,
+    "evaluation_strategy": "epoch",
+    "username": "dhananjay2912",
+    "log": "tensorboard"
+}