Remove training checkpoints to reduce model size

Browse files

Files changed (14) hide show

checkpoint-1179/config.json +0 -71
checkpoint-1179/model.safetensors +0 -3
checkpoint-1179/optimizer.pt +0 -3
checkpoint-1179/rng_state.pth +0 -3
checkpoint-1179/scheduler.pt +0 -3
checkpoint-1179/trainer_state.json +0 -970
checkpoint-1179/training_args.bin +0 -3
checkpoint-1310/config.json +0 -71
checkpoint-1310/model.safetensors +0 -3
checkpoint-1310/optimizer.pt +0 -3
checkpoint-1310/rng_state.pth +0 -3
checkpoint-1310/scheduler.pt +0 -3
checkpoint-1310/trainer_state.json +0 -1080
checkpoint-1310/training_args.bin +0 -3

checkpoint-1179/config.json DELETED Viewed

@@ -1,71 +0,0 @@
-{
-  "architectures": [
-    "RobertaForSequenceClassification"
-  ],
-  "attention_probs_dropout_prob": 0.1,
-  "bos_token_id": 0,
-  "classifier_dropout": null,
-  "dtype": "float32",
-  "eos_token_id": 2,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
-  "id2label": {
-    "0": "add_contact",
-    "1": "edit_phone",
-    "2": "edit_email",
-    "3": "edit_address",
-    "4": "delete_contact",
-    "5": "list_all_contacts",
-    "6": "search_contacts",
-    "7": "add_birthday",
-    "8": "list_birthdays",
-    "9": "add_note",
-    "10": "edit_note",
-    "11": "delete_note",
-    "12": "show_notes",
-    "13": "add_note_tag",
-    "14": "remove_note_tag",
-    "15": "search_notes_text",
-    "16": "search_notes_by_tag",
-    "17": "help",
-    "18": "exit",
-    "19": "hello"
-  },
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "label2id": {
-    "add_birthday": 7,
-    "add_contact": 0,
-    "add_note": 9,
-    "add_note_tag": 13,
-    "delete_contact": 4,
-    "delete_note": 11,
-    "edit_address": 3,
-    "edit_email": 2,
-    "edit_note": 10,
-    "edit_phone": 1,
-    "exit": 18,
-    "hello": 19,
-    "help": 17,
-    "list_all_contacts": 5,
-    "list_birthdays": 8,
-    "remove_note_tag": 14,
-    "search_contacts": 6,
-    "search_notes_by_tag": 16,
-    "search_notes_text": 15,
-    "show_notes": 12
-  },
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 514,
-  "model_type": "roberta",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 1,
-  "position_embedding_type": "absolute",
-  "problem_type": "single_label_classification",
-  "transformers_version": "4.57.0",
-  "type_vocab_size": 1,
-  "use_cache": true,
-  "vocab_size": 50265
-}

checkpoint-1179/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:43c1a057215ca31f05511e2fca66fe21b08567f621d3cf68599c9f21c43b06a8
-size 498668192

checkpoint-1179/optimizer.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3fc3886f62b31f55c1d966ff304bb7f67061dbe462e66c4c9754390c06b30cbd
-size 997451019

checkpoint-1179/rng_state.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:bf9c3b20a0a343ce38e5a13fb76bf553acda38dfb893bf7123a2d6ccc5edc6d9
-size 14455

checkpoint-1179/scheduler.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7ec190660d99873e82bc04f22cd7f50d3496428963a842ad2ae1e9cd4a99a031
-size 1465

checkpoint-1179/trainer_state.json DELETED Viewed

@@ -1,970 +0,0 @@
-{
-  "best_global_step": 917,
-  "best_metric": 0.980806142034549,
-  "best_model_checkpoint": "models/intent_classifier/checkpoint-917",
-  "epoch": 9.0,
-  "eval_steps": 500,
-  "global_step": 1179,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.07633587786259542,
-      "grad_norm": 4.617219924926758,
-      "learning_rate": 1.8000000000000001e-06,
-      "loss": 3.0068,
-      "step": 10
-    },
-    {
-      "epoch": 0.15267175572519084,
-      "grad_norm": 2.9202663898468018,
-      "learning_rate": 3.8000000000000005e-06,
-      "loss": 2.9968,
-      "step": 20
-    },
-    {
-      "epoch": 0.22900763358778625,
-      "grad_norm": 2.677699327468872,
-      "learning_rate": 5.8e-06,
-      "loss": 2.9956,
-      "step": 30
-    },
-    {
-      "epoch": 0.3053435114503817,
-      "grad_norm": 3.113600254058838,
-      "learning_rate": 7.800000000000002e-06,
-      "loss": 3.0013,
-      "step": 40
-    },
-    {
-      "epoch": 0.3816793893129771,
-      "grad_norm": 3.7662277221679688,
-      "learning_rate": 9.800000000000001e-06,
-      "loss": 2.9809,
-      "step": 50
-    },
-    {
-      "epoch": 0.4580152671755725,
-      "grad_norm": 8.282344818115234,
-      "learning_rate": 1.18e-05,
-      "loss": 2.9022,
-      "step": 60
-    },
-    {
-      "epoch": 0.5343511450381679,
-      "grad_norm": 8.860713005065918,
-      "learning_rate": 1.38e-05,
-      "loss": 2.7668,
-      "step": 70
-    },
-    {
-      "epoch": 0.6106870229007634,
-      "grad_norm": 11.435643196105957,
-      "learning_rate": 1.58e-05,
-      "loss": 2.5216,
-      "step": 80
-    },
-    {
-      "epoch": 0.6870229007633588,
-      "grad_norm": 10.48116397857666,
-      "learning_rate": 1.7800000000000002e-05,
-      "loss": 2.3281,
-      "step": 90
-    },
-    {
-      "epoch": 0.7633587786259542,
-      "grad_norm": 12.855015754699707,
-      "learning_rate": 1.98e-05,
-      "loss": 2.0133,
-      "step": 100
-    },
-    {
-      "epoch": 0.8396946564885496,
-      "grad_norm": 12.89151668548584,
-      "learning_rate": 1.985123966942149e-05,
-      "loss": 1.7684,
-      "step": 110
-    },
-    {
-      "epoch": 0.916030534351145,
-      "grad_norm": 11.234882354736328,
-      "learning_rate": 1.9685950413223144e-05,
-      "loss": 1.4861,
-      "step": 120
-    },
-    {
-      "epoch": 0.9923664122137404,
-      "grad_norm": 12.167614936828613,
-      "learning_rate": 1.9520661157024795e-05,
-      "loss": 1.2402,
-      "step": 130
-    },
-    {
-      "epoch": 1.0,
-      "eval_accuracy": 0.8944337811900192,
-      "eval_f1": 0.8911749215123173,
-      "eval_loss": 0.9475375413894653,
-      "eval_precision": 0.912477309076372,
-      "eval_recall": 0.8944337811900192,
-      "eval_runtime": 3.1553,
-      "eval_samples_per_second": 165.121,
-      "eval_steps_per_second": 10.459,
-      "step": 131
-    },
-    {
-      "epoch": 1.0687022900763359,
-      "grad_norm": 9.206323623657227,
-      "learning_rate": 1.9355371900826446e-05,
-      "loss": 1.0125,
-      "step": 140
-    },
-    {
-      "epoch": 1.1450381679389312,
-      "grad_norm": 8.127516746520996,
-      "learning_rate": 1.91900826446281e-05,
-      "loss": 0.8771,
-      "step": 150
-    },
-    {
-      "epoch": 1.2213740458015268,
-      "grad_norm": 13.526582717895508,
-      "learning_rate": 1.9024793388429755e-05,
-      "loss": 0.7546,
-      "step": 160
-    },
-    {
-      "epoch": 1.297709923664122,
-      "grad_norm": 9.502181053161621,
-      "learning_rate": 1.8859504132231407e-05,
-      "loss": 0.7518,
-      "step": 170
-    },
-    {
-      "epoch": 1.3740458015267176,
-      "grad_norm": 4.78341007232666,
-      "learning_rate": 1.8694214876033058e-05,
-      "loss": 0.64,
-      "step": 180
-    },
-    {
-      "epoch": 1.450381679389313,
-      "grad_norm": 9.189094543457031,
-      "learning_rate": 1.8528925619834712e-05,
-      "loss": 0.497,
-      "step": 190
-    },
-    {
-      "epoch": 1.5267175572519083,
-      "grad_norm": 14.268597602844238,
-      "learning_rate": 1.8363636363636367e-05,
-      "loss": 0.4995,
-      "step": 200
-    },
-    {
-      "epoch": 1.6030534351145038,
-      "grad_norm": 9.649062156677246,
-      "learning_rate": 1.819834710743802e-05,
-      "loss": 0.4439,
-      "step": 210
-    },
-    {
-      "epoch": 1.6793893129770994,
-      "grad_norm": 20.48824119567871,
-      "learning_rate": 1.803305785123967e-05,
-      "loss": 0.498,
-      "step": 220
-    },
-    {
-      "epoch": 1.7557251908396947,
-      "grad_norm": 12.906113624572754,
-      "learning_rate": 1.7867768595041324e-05,
-      "loss": 0.4111,
-      "step": 230
-    },
-    {
-      "epoch": 1.83206106870229,
-      "grad_norm": 18.856950759887695,
-      "learning_rate": 1.770247933884298e-05,
-      "loss": 0.3567,
-      "step": 240
-    },
-    {
-      "epoch": 1.9083969465648853,
-      "grad_norm": 5.464386463165283,
-      "learning_rate": 1.753719008264463e-05,
-      "loss": 0.3243,
-      "step": 250
-    },
-    {
-      "epoch": 1.984732824427481,
-      "grad_norm": 5.167541027069092,
-      "learning_rate": 1.737190082644628e-05,
-      "loss": 0.2887,
-      "step": 260
-    },
-    {
-      "epoch": 2.0,
-      "eval_accuracy": 0.9750479846449136,
-      "eval_f1": 0.9749686805377397,
-      "eval_loss": 0.2119528353214264,
-      "eval_precision": 0.976201785633538,
-      "eval_recall": 0.9750479846449136,
-      "eval_runtime": 3.0517,
-      "eval_samples_per_second": 170.723,
-      "eval_steps_per_second": 10.814,
-      "step": 262
-    },
-    {
-      "epoch": 2.0610687022900764,
-      "grad_norm": 2.6736671924591064,
-      "learning_rate": 1.7206611570247936e-05,
-      "loss": 0.1954,
-      "step": 270
-    },
-    {
-      "epoch": 2.1374045801526718,
-      "grad_norm": 5.401017665863037,
-      "learning_rate": 1.7041322314049587e-05,
-      "loss": 0.2031,
-      "step": 280
-    },
-    {
-      "epoch": 2.213740458015267,
-      "grad_norm": 13.930877685546875,
-      "learning_rate": 1.687603305785124e-05,
-      "loss": 0.1799,
-      "step": 290
-    },
-    {
-      "epoch": 2.2900763358778624,
-      "grad_norm": 6.306116104125977,
-      "learning_rate": 1.6710743801652893e-05,
-      "loss": 0.1459,
-      "step": 300
-    },
-    {
-      "epoch": 2.366412213740458,
-      "grad_norm": 2.7880892753601074,
-      "learning_rate": 1.6545454545454548e-05,
-      "loss": 0.1426,
-      "step": 310
-    },
-    {
-      "epoch": 2.4427480916030535,
-      "grad_norm": 15.64450740814209,
-      "learning_rate": 1.63801652892562e-05,
-      "loss": 0.1184,
-      "step": 320
-    },
-    {
-      "epoch": 2.519083969465649,
-      "grad_norm": 1.4264142513275146,
-      "learning_rate": 1.6214876033057853e-05,
-      "loss": 0.1372,
-      "step": 330
-    },
-    {
-      "epoch": 2.595419847328244,
-      "grad_norm": 14.163614273071289,
-      "learning_rate": 1.6049586776859505e-05,
-      "loss": 0.145,
-      "step": 340
-    },
-    {
-      "epoch": 2.67175572519084,
-      "grad_norm": 5.825468063354492,
-      "learning_rate": 1.588429752066116e-05,
-      "loss": 0.1137,
-      "step": 350
-    },
-    {
-      "epoch": 2.7480916030534353,
-      "grad_norm": 0.70721435546875,
-      "learning_rate": 1.571900826446281e-05,
-      "loss": 0.0688,
-      "step": 360
-    },
-    {
-      "epoch": 2.8244274809160306,
-      "grad_norm": 5.984133720397949,
-      "learning_rate": 1.5553719008264465e-05,
-      "loss": 0.1614,
-      "step": 370
-    },
-    {
-      "epoch": 2.900763358778626,
-      "grad_norm": 2.9067797660827637,
-      "learning_rate": 1.5388429752066116e-05,
-      "loss": 0.1258,
-      "step": 380
-    },
-    {
-      "epoch": 2.9770992366412212,
-      "grad_norm": 0.7466038465499878,
-      "learning_rate": 1.522314049586777e-05,
-      "loss": 0.1494,
-      "step": 390
-    },
-    {
-      "epoch": 3.0,
-      "eval_accuracy": 0.9692898272552783,
-      "eval_f1": 0.9692039233298336,
-      "eval_loss": 0.14140835404396057,
-      "eval_precision": 0.9706301207884909,
-      "eval_recall": 0.9692898272552783,
-      "eval_runtime": 2.9931,
-      "eval_samples_per_second": 174.066,
-      "eval_steps_per_second": 11.025,
-      "step": 393
-    },
-    {
-      "epoch": 3.053435114503817,
-      "grad_norm": 5.72157621383667,
-      "learning_rate": 1.5057851239669424e-05,
-      "loss": 0.1056,
-      "step": 400
-    },
-    {
-      "epoch": 3.1297709923664123,
-      "grad_norm": 2.024094343185425,
-      "learning_rate": 1.4892561983471077e-05,
-      "loss": 0.1402,
-      "step": 410
-    },
-    {
-      "epoch": 3.2061068702290076,
-      "grad_norm": 1.147175669670105,
-      "learning_rate": 1.4727272727272728e-05,
-      "loss": 0.0474,
-      "step": 420
-    },
-    {
-      "epoch": 3.282442748091603,
-      "grad_norm": 4.005652904510498,
-      "learning_rate": 1.4561983471074381e-05,
-      "loss": 0.076,
-      "step": 430
-    },
-    {
-      "epoch": 3.3587786259541983,
-      "grad_norm": 0.9247184991836548,
-      "learning_rate": 1.4396694214876035e-05,
-      "loss": 0.0709,
-      "step": 440
-    },
-    {
-      "epoch": 3.435114503816794,
-      "grad_norm": 4.577192306518555,
-      "learning_rate": 1.4231404958677688e-05,
-      "loss": 0.0678,
-      "step": 450
-    },
-    {
-      "epoch": 3.5114503816793894,
-      "grad_norm": 0.21287904679775238,
-      "learning_rate": 1.406611570247934e-05,
-      "loss": 0.0404,
-      "step": 460
-    },
-    {
-      "epoch": 3.5877862595419847,
-      "grad_norm": 0.67902010679245,
-      "learning_rate": 1.3900826446280993e-05,
-      "loss": 0.0508,
-      "step": 470
-    },
-    {
-      "epoch": 3.66412213740458,
-      "grad_norm": 0.8000791072845459,
-      "learning_rate": 1.3735537190082645e-05,
-      "loss": 0.0473,
-      "step": 480
-    },
-    {
-      "epoch": 3.7404580152671754,
-      "grad_norm": 1.3421847820281982,
-      "learning_rate": 1.35702479338843e-05,
-      "loss": 0.0223,
-      "step": 490
-    },
-    {
-      "epoch": 3.816793893129771,
-      "grad_norm": 0.182773157954216,
-      "learning_rate": 1.3404958677685951e-05,
-      "loss": 0.0196,
-      "step": 500
-    },
-    {
-      "epoch": 3.8931297709923665,
-      "grad_norm": 1.4306972026824951,
-      "learning_rate": 1.3239669421487604e-05,
-      "loss": 0.0205,
-      "step": 510
-    },
-    {
-      "epoch": 3.969465648854962,
-      "grad_norm": 1.896088719367981,
-      "learning_rate": 1.3074380165289257e-05,
-      "loss": 0.0212,
-      "step": 520
-    },
-    {
-      "epoch": 4.0,
-      "eval_accuracy": 0.9769673704414588,
-      "eval_f1": 0.9769688162747627,
-      "eval_loss": 0.12679165601730347,
-      "eval_precision": 0.9782792844480811,
-      "eval_recall": 0.9769673704414588,
-      "eval_runtime": 3.1232,
-      "eval_samples_per_second": 166.818,
-      "eval_steps_per_second": 10.566,
-      "step": 524
-    },
-    {
-      "epoch": 4.0458015267175576,
-      "grad_norm": 0.14600762724876404,
-      "learning_rate": 1.2909090909090912e-05,
-      "loss": 0.0222,
-      "step": 530
-    },
-    {
-      "epoch": 4.122137404580153,
-      "grad_norm": 8.074915885925293,
-      "learning_rate": 1.2743801652892563e-05,
-      "loss": 0.0542,
-      "step": 540
-    },
-    {
-      "epoch": 4.198473282442748,
-      "grad_norm": 0.09765351563692093,
-      "learning_rate": 1.2578512396694216e-05,
-      "loss": 0.0394,
-      "step": 550
-    },
-    {
-      "epoch": 4.2748091603053435,
-      "grad_norm": 0.33209875226020813,
-      "learning_rate": 1.2413223140495869e-05,
-      "loss": 0.0241,
-      "step": 560
-    },
-    {
-      "epoch": 4.351145038167939,
-      "grad_norm": 0.5309058427810669,
-      "learning_rate": 1.2247933884297522e-05,
-      "loss": 0.0161,
-      "step": 570
-    },
-    {
-      "epoch": 4.427480916030534,
-      "grad_norm": 0.1629948765039444,
-      "learning_rate": 1.2082644628099173e-05,
-      "loss": 0.0129,
-      "step": 580
-    },
-    {
-      "epoch": 4.5038167938931295,
-      "grad_norm": 0.15240447223186493,
-      "learning_rate": 1.1917355371900828e-05,
-      "loss": 0.0128,
-      "step": 590
-    },
-    {
-      "epoch": 4.580152671755725,
-      "grad_norm": 0.10693137347698212,
-      "learning_rate": 1.175206611570248e-05,
-      "loss": 0.0724,
-      "step": 600
-    },
-    {
-      "epoch": 4.65648854961832,
-      "grad_norm": 0.8860049843788147,
-      "learning_rate": 1.1586776859504133e-05,
-      "loss": 0.013,
-      "step": 610
-    },
-    {
-      "epoch": 4.732824427480916,
-      "grad_norm": 1.1124643087387085,
-      "learning_rate": 1.1421487603305785e-05,
-      "loss": 0.0228,
-      "step": 620
-    },
-    {
-      "epoch": 4.809160305343512,
-      "grad_norm": 16.63216209411621,
-      "learning_rate": 1.125619834710744e-05,
-      "loss": 0.1361,
-      "step": 630
-    },
-    {
-      "epoch": 4.885496183206107,
-      "grad_norm": 0.22511304914951324,
-      "learning_rate": 1.1090909090909092e-05,
-      "loss": 0.0127,
-      "step": 640
-    },
-    {
-      "epoch": 4.961832061068702,
-      "grad_norm": 0.2706206142902374,
-      "learning_rate": 1.0925619834710745e-05,
-      "loss": 0.0127,
-      "step": 650
-    },
-    {
-      "epoch": 5.0,
-      "eval_accuracy": 0.9731285988483686,
-      "eval_f1": 0.973064976375106,
-      "eval_loss": 0.14903880655765533,
-      "eval_precision": 0.9743006090972162,
-      "eval_recall": 0.9731285988483686,
-      "eval_runtime": 3.1635,
-      "eval_samples_per_second": 164.69,
-      "eval_steps_per_second": 10.431,
-      "step": 655
-    },
-    {
-      "epoch": 5.038167938931298,
-      "grad_norm": 26.473268508911133,
-      "learning_rate": 1.0760330578512396e-05,
-      "loss": 0.0238,
-      "step": 660
-    },
-    {
-      "epoch": 5.114503816793893,
-      "grad_norm": 0.08209118992090225,
-      "learning_rate": 1.0595041322314051e-05,
-      "loss": 0.0107,
-      "step": 670
-    },
-    {
-      "epoch": 5.190839694656488,
-      "grad_norm": 1.1051641702651978,
-      "learning_rate": 1.0429752066115704e-05,
-      "loss": 0.0682,
-      "step": 680
-    },
-    {
-      "epoch": 5.267175572519084,
-      "grad_norm": 11.310916900634766,
-      "learning_rate": 1.0264462809917357e-05,
-      "loss": 0.0131,
-      "step": 690
-    },
-    {
-      "epoch": 5.34351145038168,
-      "grad_norm": 0.09134263545274734,
-      "learning_rate": 1.0099173553719008e-05,
-      "loss": 0.0428,
-      "step": 700
-    },
-    {
-      "epoch": 5.419847328244275,
-      "grad_norm": 0.08655811846256256,
-      "learning_rate": 9.933884297520661e-06,
-      "loss": 0.0246,
-      "step": 710
-    },
-    {
-      "epoch": 5.4961832061068705,
-      "grad_norm": 0.16410402953624725,
-      "learning_rate": 9.768595041322316e-06,
-      "loss": 0.0094,
-      "step": 720
-    },
-    {
-      "epoch": 5.572519083969466,
-      "grad_norm": 1.349546194076538,
-      "learning_rate": 9.603305785123967e-06,
-      "loss": 0.045,
-      "step": 730
-    },
-    {
-      "epoch": 5.648854961832061,
-      "grad_norm": 0.12257255613803864,
-      "learning_rate": 9.438016528925621e-06,
-      "loss": 0.0516,
-      "step": 740
-    },
-    {
-      "epoch": 5.7251908396946565,
-      "grad_norm": 0.06910885125398636,
-      "learning_rate": 9.272727272727273e-06,
-      "loss": 0.009,
-      "step": 750
-    },
-    {
-      "epoch": 5.801526717557252,
-      "grad_norm": 0.056132227182388306,
-      "learning_rate": 9.107438016528927e-06,
-      "loss": 0.0107,
-      "step": 760
-    },
-    {
-      "epoch": 5.877862595419847,
-      "grad_norm": 0.07667958736419678,
-      "learning_rate": 8.942148760330578e-06,
-      "loss": 0.0086,
-      "step": 770
-    },
-    {
-      "epoch": 5.9541984732824424,
-      "grad_norm": 0.10609736293554306,
-      "learning_rate": 8.776859504132233e-06,
-      "loss": 0.0085,
-      "step": 780
-    },
-    {
-      "epoch": 6.0,
-      "eval_accuracy": 0.9788867562380038,
-      "eval_f1": 0.9789720270641704,
-      "eval_loss": 0.12155096977949142,
-      "eval_precision": 0.9801130700504813,
-      "eval_recall": 0.9788867562380038,
-      "eval_runtime": 3.2955,
-      "eval_samples_per_second": 158.096,
-      "eval_steps_per_second": 10.014,
-      "step": 786
-    },
-    {
-      "epoch": 6.030534351145038,
-      "grad_norm": 0.06408526748418808,
-      "learning_rate": 8.611570247933884e-06,
-      "loss": 0.0081,
-      "step": 790
-    },
-    {
-      "epoch": 6.106870229007634,
-      "grad_norm": 0.07884930074214935,
-      "learning_rate": 8.446280991735539e-06,
-      "loss": 0.031,
-      "step": 800
-    },
-    {
-      "epoch": 6.183206106870229,
-      "grad_norm": 0.07998275011777878,
-      "learning_rate": 8.28099173553719e-06,
-      "loss": 0.0453,
-      "step": 810
-    },
-    {
-      "epoch": 6.259541984732825,
-      "grad_norm": 0.22578206658363342,
-      "learning_rate": 8.115702479338843e-06,
-      "loss": 0.0078,
-      "step": 820
-    },
-    {
-      "epoch": 6.33587786259542,
-      "grad_norm": 0.07642875611782074,
-      "learning_rate": 7.950413223140496e-06,
-      "loss": 0.0086,
-      "step": 830
-    },
-    {
-      "epoch": 6.412213740458015,
-      "grad_norm": 0.10305721312761307,
-      "learning_rate": 7.785123966942149e-06,
-      "loss": 0.0444,
-      "step": 840
-    },
-    {
-      "epoch": 6.488549618320611,
-      "grad_norm": 0.0701122134923935,
-      "learning_rate": 7.619834710743802e-06,
-      "loss": 0.0077,
-      "step": 850
-    },
-    {
-      "epoch": 6.564885496183206,
-      "grad_norm": 0.07119292765855789,
-      "learning_rate": 7.454545454545456e-06,
-      "loss": 0.0076,
-      "step": 860
-    },
-    {
-      "epoch": 6.641221374045801,
-      "grad_norm": 0.2685672342777252,
-      "learning_rate": 7.289256198347108e-06,
-      "loss": 0.0077,
-      "step": 870
-    },
-    {
-      "epoch": 6.717557251908397,
-      "grad_norm": 0.0628926083445549,
-      "learning_rate": 7.1239669421487615e-06,
-      "loss": 0.0072,
-      "step": 880
-    },
-    {
-      "epoch": 6.793893129770993,
-      "grad_norm": 0.06299301236867905,
-      "learning_rate": 6.9586776859504135e-06,
-      "loss": 0.0109,
-      "step": 890
-    },
-    {
-      "epoch": 6.870229007633588,
-      "grad_norm": 0.06120818480849266,
-      "learning_rate": 6.793388429752067e-06,
-      "loss": 0.0069,
-      "step": 900
-    },
-    {
-      "epoch": 6.9465648854961835,
-      "grad_norm": 0.08700945228338242,
-      "learning_rate": 6.628099173553719e-06,
-      "loss": 0.0073,
-      "step": 910
-    },
-    {
-      "epoch": 7.0,
-      "eval_accuracy": 0.980806142034549,
-      "eval_f1": 0.980791368968265,
-      "eval_loss": 0.11875477433204651,
-      "eval_precision": 0.981936841149893,
-      "eval_recall": 0.980806142034549,
-      "eval_runtime": 3.1069,
-      "eval_samples_per_second": 167.692,
-      "eval_steps_per_second": 10.622,
-      "step": 917
-    },
-    {
-      "epoch": 7.022900763358779,
-      "grad_norm": 0.061511170119047165,
-      "learning_rate": 6.462809917355372e-06,
-      "loss": 0.0066,
-      "step": 920
-    },
-    {
-      "epoch": 7.099236641221374,
-      "grad_norm": 0.06128810718655586,
-      "learning_rate": 6.297520661157025e-06,
-      "loss": 0.0064,
-      "step": 930
-    },
-    {
-      "epoch": 7.175572519083969,
-      "grad_norm": 0.05454257130622864,
-      "learning_rate": 6.132231404958678e-06,
-      "loss": 0.0066,
-      "step": 940
-    },
-    {
-      "epoch": 7.251908396946565,
-      "grad_norm": 0.09356739372015,
-      "learning_rate": 5.966942148760331e-06,
-      "loss": 0.0065,
-      "step": 950
-    },
-    {
-      "epoch": 7.32824427480916,
-      "grad_norm": 0.04699549078941345,
-      "learning_rate": 5.801652892561984e-06,
-      "loss": 0.006,
-      "step": 960
-    },
-    {
-      "epoch": 7.404580152671755,
-      "grad_norm": 0.04597270488739014,
-      "learning_rate": 5.636363636363636e-06,
-      "loss": 0.0063,
-      "step": 970
-    },
-    {
-      "epoch": 7.480916030534351,
-      "grad_norm": 0.05777190253138542,
-      "learning_rate": 5.47107438016529e-06,
-      "loss": 0.0057,
-      "step": 980
-    },
-    {
-      "epoch": 7.557251908396947,
-      "grad_norm": 0.0520237572491169,
-      "learning_rate": 5.305785123966942e-06,
-      "loss": 0.006,
-      "step": 990
-    },
-    {
-      "epoch": 7.633587786259542,
-      "grad_norm": 0.0427822545170784,
-      "learning_rate": 5.140495867768596e-06,
-      "loss": 0.0059,
-      "step": 1000
-    },
-    {
-      "epoch": 7.709923664122138,
-      "grad_norm": 0.05699237063527107,
-      "learning_rate": 4.975206611570249e-06,
-      "loss": 0.0055,
-      "step": 1010
-    },
-    {
-      "epoch": 7.786259541984733,
-      "grad_norm": 0.05885695666074753,
-      "learning_rate": 4.8099173553719015e-06,
-      "loss": 0.0258,
-      "step": 1020
-    },
-    {
-      "epoch": 7.862595419847328,
-      "grad_norm": 0.05190462991595268,
-      "learning_rate": 4.6446280991735544e-06,
-      "loss": 0.0496,
-      "step": 1030
-    },
-    {
-      "epoch": 7.938931297709924,
-      "grad_norm": 0.03909669816493988,
-      "learning_rate": 4.479338842975207e-06,
-      "loss": 0.0398,
-      "step": 1040
-    },
-    {
-      "epoch": 8.0,
-      "eval_accuracy": 0.980806142034549,
-      "eval_f1": 0.980791368968265,
-      "eval_loss": 0.12089628726243973,
-      "eval_precision": 0.981936841149893,
-      "eval_recall": 0.980806142034549,
-      "eval_runtime": 3.1129,
-      "eval_samples_per_second": 167.366,
-      "eval_steps_per_second": 10.601,
-      "step": 1048
-    },
-    {
-      "epoch": 8.01526717557252,
-      "grad_norm": 0.05181822180747986,
-      "learning_rate": 4.31404958677686e-06,
-      "loss": 0.0062,
-      "step": 1050
-    },
-    {
-      "epoch": 8.091603053435115,
-      "grad_norm": 0.03777517005801201,
-      "learning_rate": 4.148760330578513e-06,
-      "loss": 0.0058,
-      "step": 1060
-    },
-    {
-      "epoch": 8.16793893129771,
-      "grad_norm": 0.04515732452273369,
-      "learning_rate": 3.983471074380166e-06,
-      "loss": 0.0056,
-      "step": 1070
-    },
-    {
-      "epoch": 8.244274809160306,
-      "grad_norm": 0.044928282499313354,
-      "learning_rate": 3.818181818181819e-06,
-      "loss": 0.0055,
-      "step": 1080
-    },
-    {
-      "epoch": 8.320610687022901,
-      "grad_norm": 0.05599347501993179,
-      "learning_rate": 3.6528925619834715e-06,
-      "loss": 0.0057,
-      "step": 1090
-    },
-    {
-      "epoch": 8.396946564885496,
-      "grad_norm": 1.0466651916503906,
-      "learning_rate": 3.4876033057851245e-06,
-      "loss": 0.0384,
-      "step": 1100
-    },
-    {
-      "epoch": 8.473282442748092,
-      "grad_norm": 0.05839056894183159,
-      "learning_rate": 3.3223140495867774e-06,
-      "loss": 0.0057,
-      "step": 1110
-    },
-    {
-      "epoch": 8.549618320610687,
-      "grad_norm": 0.05969908460974693,
-      "learning_rate": 3.1570247933884303e-06,
-      "loss": 0.0424,
-      "step": 1120
-    },
-    {
-      "epoch": 8.625954198473282,
-      "grad_norm": 0.06252706795930862,
-      "learning_rate": 2.9917355371900832e-06,
-      "loss": 0.0174,
-      "step": 1130
-    },
-    {
-      "epoch": 8.702290076335878,
-      "grad_norm": 0.1538064330816269,
-      "learning_rate": 2.8264462809917357e-06,
-      "loss": 0.0058,
-      "step": 1140
-    },
-    {
-      "epoch": 8.778625954198473,
-      "grad_norm": 0.05743182823061943,
-      "learning_rate": 2.6611570247933886e-06,
-      "loss": 0.0055,
-      "step": 1150
-    },
-    {
-      "epoch": 8.854961832061068,
-      "grad_norm": 0.06665431708097458,
-      "learning_rate": 2.4958677685950416e-06,
-      "loss": 0.0057,
-      "step": 1160
-    },
-    {
-      "epoch": 8.931297709923664,
-      "grad_norm": 0.07899218052625656,
-      "learning_rate": 2.3305785123966945e-06,
-      "loss": 0.0055,
-      "step": 1170
-    },
-    {
-      "epoch": 9.0,
-      "eval_accuracy": 0.980806142034549,
-      "eval_f1": 0.9807999304274743,
-      "eval_loss": 0.12244618684053421,
-      "eval_precision": 0.9819563131797131,
-      "eval_recall": 0.980806142034549,
-      "eval_runtime": 3.1404,
-      "eval_samples_per_second": 165.902,
-      "eval_steps_per_second": 10.508,
-      "step": 1179
-    }
-  ],
-  "logging_steps": 10,
-  "max_steps": 1310,
-  "num_input_tokens_seen": 0,
-  "num_train_epochs": 10,
-  "save_steps": 500,
-  "stateful_callbacks": {
-    "EarlyStoppingCallback": {
-      "args": {
-        "early_stopping_patience": 3,
-        "early_stopping_threshold": 0.0
-      },
-      "attributes": {
-        "early_stopping_patience_counter": 2
-      }
-    },
-    "TrainerControl": {
-      "args": {
-        "should_epoch_stop": false,
-        "should_evaluate": false,
-        "should_log": false,
-        "should_save": true,
-        "should_training_stop": false
-      },
-      "attributes": {}
-    }
-  },
-  "total_flos": 1233335031616512.0,
-  "train_batch_size": 16,
-  "trial_name": null,
-  "trial_params": null
-}

checkpoint-1179/training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:21a4ac2a72f23cb69080a4fb3a9a3266e6a76062c2c55904cad3d4237f62c83e
-size 5841

checkpoint-1310/config.json DELETED Viewed

@@ -1,71 +0,0 @@
-{
-  "architectures": [
-    "RobertaForSequenceClassification"
-  ],
-  "attention_probs_dropout_prob": 0.1,
-  "bos_token_id": 0,
-  "classifier_dropout": null,
-  "dtype": "float32",
-  "eos_token_id": 2,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
-  "id2label": {
-    "0": "add_contact",
-    "1": "edit_phone",
-    "2": "edit_email",
-    "3": "edit_address",
-    "4": "delete_contact",
-    "5": "list_all_contacts",
-    "6": "search_contacts",
-    "7": "add_birthday",
-    "8": "list_birthdays",
-    "9": "add_note",
-    "10": "edit_note",
-    "11": "delete_note",
-    "12": "show_notes",
-    "13": "add_note_tag",
-    "14": "remove_note_tag",
-    "15": "search_notes_text",
-    "16": "search_notes_by_tag",
-    "17": "help",
-    "18": "exit",
-    "19": "hello"
-  },
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "label2id": {
-    "add_birthday": 7,
-    "add_contact": 0,
-    "add_note": 9,
-    "add_note_tag": 13,
-    "delete_contact": 4,
-    "delete_note": 11,
-    "edit_address": 3,
-    "edit_email": 2,
-    "edit_note": 10,
-    "edit_phone": 1,
-    "exit": 18,
-    "hello": 19,
-    "help": 17,
-    "list_all_contacts": 5,
-    "list_birthdays": 8,
-    "remove_note_tag": 14,
-    "search_contacts": 6,
-    "search_notes_by_tag": 16,
-    "search_notes_text": 15,
-    "show_notes": 12
-  },
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 514,
-  "model_type": "roberta",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 1,
-  "position_embedding_type": "absolute",
-  "problem_type": "single_label_classification",
-  "transformers_version": "4.57.0",
-  "type_vocab_size": 1,
-  "use_cache": true,
-  "vocab_size": 50265
-}

checkpoint-1310/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c4a7120d267e8b923b5542c1ea6aeba65facf08182ea362827113fe36481f4e4
-size 498668192

checkpoint-1310/optimizer.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6450cd41a91b312e87c4978cd7fdaee344ccafc5b63f22d076fc1187307eaf9a
-size 997451019

checkpoint-1310/rng_state.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:791590502d32babfec3e01cad84acac1a5c5f69449f6851db53f4aead2041f79
-size 14455

checkpoint-1310/scheduler.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ef479fcc9aa8f88124c5b577e46a56b83c9f1415e04b0e4ef3b46ce53586f6bb
-size 1465

checkpoint-1310/trainer_state.json DELETED Viewed

@@ -1,1080 +0,0 @@
-{
-  "best_global_step": 1310,
-  "best_metric": 0.982725527831094,
-  "best_model_checkpoint": "models/intent_classifier/checkpoint-1310",
-  "epoch": 10.0,
-  "eval_steps": 500,
-  "global_step": 1310,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.07633587786259542,
-      "grad_norm": 4.617219924926758,
-      "learning_rate": 1.8000000000000001e-06,
-      "loss": 3.0068,
-      "step": 10
-    },
-    {
-      "epoch": 0.15267175572519084,
-      "grad_norm": 2.9202663898468018,
-      "learning_rate": 3.8000000000000005e-06,
-      "loss": 2.9968,
-      "step": 20
-    },
-    {
-      "epoch": 0.22900763358778625,
-      "grad_norm": 2.677699327468872,
-      "learning_rate": 5.8e-06,
-      "loss": 2.9956,
-      "step": 30
-    },
-    {
-      "epoch": 0.3053435114503817,
-      "grad_norm": 3.113600254058838,
-      "learning_rate": 7.800000000000002e-06,
-      "loss": 3.0013,
-      "step": 40
-    },
-    {
-      "epoch": 0.3816793893129771,
-      "grad_norm": 3.7662277221679688,
-      "learning_rate": 9.800000000000001e-06,
-      "loss": 2.9809,
-      "step": 50
-    },
-    {
-      "epoch": 0.4580152671755725,
-      "grad_norm": 8.282344818115234,
-      "learning_rate": 1.18e-05,
-      "loss": 2.9022,
-      "step": 60
-    },
-    {
-      "epoch": 0.5343511450381679,
-      "grad_norm": 8.860713005065918,
-      "learning_rate": 1.38e-05,
-      "loss": 2.7668,
-      "step": 70
-    },
-    {
-      "epoch": 0.6106870229007634,
-      "grad_norm": 11.435643196105957,
-      "learning_rate": 1.58e-05,
-      "loss": 2.5216,
-      "step": 80
-    },
-    {
-      "epoch": 0.6870229007633588,
-      "grad_norm": 10.48116397857666,
-      "learning_rate": 1.7800000000000002e-05,
-      "loss": 2.3281,
-      "step": 90
-    },
-    {
-      "epoch": 0.7633587786259542,
-      "grad_norm": 12.855015754699707,
-      "learning_rate": 1.98e-05,
-      "loss": 2.0133,
-      "step": 100
-    },
-    {
-      "epoch": 0.8396946564885496,
-      "grad_norm": 12.89151668548584,
-      "learning_rate": 1.985123966942149e-05,
-      "loss": 1.7684,
-      "step": 110
-    },
-    {
-      "epoch": 0.916030534351145,
-      "grad_norm": 11.234882354736328,
-      "learning_rate": 1.9685950413223144e-05,
-      "loss": 1.4861,
-      "step": 120
-    },
-    {
-      "epoch": 0.9923664122137404,
-      "grad_norm": 12.167614936828613,
-      "learning_rate": 1.9520661157024795e-05,
-      "loss": 1.2402,
-      "step": 130
-    },
-    {
-      "epoch": 1.0,
-      "eval_accuracy": 0.8944337811900192,
-      "eval_f1": 0.8911749215123173,
-      "eval_loss": 0.9475375413894653,
-      "eval_precision": 0.912477309076372,
-      "eval_recall": 0.8944337811900192,
-      "eval_runtime": 3.1553,
-      "eval_samples_per_second": 165.121,
-      "eval_steps_per_second": 10.459,
-      "step": 131
-    },
-    {
-      "epoch": 1.0687022900763359,
-      "grad_norm": 9.206323623657227,
-      "learning_rate": 1.9355371900826446e-05,
-      "loss": 1.0125,
-      "step": 140
-    },
-    {
-      "epoch": 1.1450381679389312,
-      "grad_norm": 8.127516746520996,
-      "learning_rate": 1.91900826446281e-05,
-      "loss": 0.8771,
-      "step": 150
-    },
-    {
-      "epoch": 1.2213740458015268,
-      "grad_norm": 13.526582717895508,
-      "learning_rate": 1.9024793388429755e-05,
-      "loss": 0.7546,
-      "step": 160
-    },
-    {
-      "epoch": 1.297709923664122,
-      "grad_norm": 9.502181053161621,
-      "learning_rate": 1.8859504132231407e-05,
-      "loss": 0.7518,
-      "step": 170
-    },
-    {
-      "epoch": 1.3740458015267176,
-      "grad_norm": 4.78341007232666,
-      "learning_rate": 1.8694214876033058e-05,
-      "loss": 0.64,
-      "step": 180
-    },
-    {
-      "epoch": 1.450381679389313,
-      "grad_norm": 9.189094543457031,
-      "learning_rate": 1.8528925619834712e-05,
-      "loss": 0.497,
-      "step": 190
-    },
-    {
-      "epoch": 1.5267175572519083,
-      "grad_norm": 14.268597602844238,
-      "learning_rate": 1.8363636363636367e-05,
-      "loss": 0.4995,
-      "step": 200
-    },
-    {
-      "epoch": 1.6030534351145038,
-      "grad_norm": 9.649062156677246,
-      "learning_rate": 1.819834710743802e-05,
-      "loss": 0.4439,
-      "step": 210
-    },
-    {
-      "epoch": 1.6793893129770994,
-      "grad_norm": 20.48824119567871,
-      "learning_rate": 1.803305785123967e-05,
-      "loss": 0.498,
-      "step": 220
-    },
-    {
-      "epoch": 1.7557251908396947,
-      "grad_norm": 12.906113624572754,
-      "learning_rate": 1.7867768595041324e-05,
-      "loss": 0.4111,
-      "step": 230
-    },
-    {
-      "epoch": 1.83206106870229,
-      "grad_norm": 18.856950759887695,
-      "learning_rate": 1.770247933884298e-05,
-      "loss": 0.3567,
-      "step": 240
-    },
-    {
-      "epoch": 1.9083969465648853,
-      "grad_norm": 5.464386463165283,
-      "learning_rate": 1.753719008264463e-05,
-      "loss": 0.3243,
-      "step": 250
-    },
-    {
-      "epoch": 1.984732824427481,
-      "grad_norm": 5.167541027069092,
-      "learning_rate": 1.737190082644628e-05,
-      "loss": 0.2887,
-      "step": 260
-    },
-    {
-      "epoch": 2.0,
-      "eval_accuracy": 0.9750479846449136,
-      "eval_f1": 0.9749686805377397,
-      "eval_loss": 0.2119528353214264,
-      "eval_precision": 0.976201785633538,
-      "eval_recall": 0.9750479846449136,
-      "eval_runtime": 3.0517,
-      "eval_samples_per_second": 170.723,
-      "eval_steps_per_second": 10.814,
-      "step": 262
-    },
-    {
-      "epoch": 2.0610687022900764,
-      "grad_norm": 2.6736671924591064,
-      "learning_rate": 1.7206611570247936e-05,
-      "loss": 0.1954,
-      "step": 270
-    },
-    {
-      "epoch": 2.1374045801526718,
-      "grad_norm": 5.401017665863037,
-      "learning_rate": 1.7041322314049587e-05,
-      "loss": 0.2031,
-      "step": 280
-    },
-    {
-      "epoch": 2.213740458015267,
-      "grad_norm": 13.930877685546875,
-      "learning_rate": 1.687603305785124e-05,
-      "loss": 0.1799,
-      "step": 290
-    },
-    {
-      "epoch": 2.2900763358778624,
-      "grad_norm": 6.306116104125977,
-      "learning_rate": 1.6710743801652893e-05,
-      "loss": 0.1459,
-      "step": 300
-    },
-    {
-      "epoch": 2.366412213740458,
-      "grad_norm": 2.7880892753601074,
-      "learning_rate": 1.6545454545454548e-05,
-      "loss": 0.1426,
-      "step": 310
-    },
-    {
-      "epoch": 2.4427480916030535,
-      "grad_norm": 15.64450740814209,
-      "learning_rate": 1.63801652892562e-05,
-      "loss": 0.1184,
-      "step": 320
-    },
-    {
-      "epoch": 2.519083969465649,
-      "grad_norm": 1.4264142513275146,
-      "learning_rate": 1.6214876033057853e-05,
-      "loss": 0.1372,
-      "step": 330
-    },
-    {
-      "epoch": 2.595419847328244,
-      "grad_norm": 14.163614273071289,
-      "learning_rate": 1.6049586776859505e-05,
-      "loss": 0.145,
-      "step": 340
-    },
-    {
-      "epoch": 2.67175572519084,
-      "grad_norm": 5.825468063354492,
-      "learning_rate": 1.588429752066116e-05,
-      "loss": 0.1137,
-      "step": 350
-    },
-    {
-      "epoch": 2.7480916030534353,
-      "grad_norm": 0.70721435546875,
-      "learning_rate": 1.571900826446281e-05,
-      "loss": 0.0688,
-      "step": 360
-    },
-    {
-      "epoch": 2.8244274809160306,
-      "grad_norm": 5.984133720397949,
-      "learning_rate": 1.5553719008264465e-05,
-      "loss": 0.1614,
-      "step": 370
-    },
-    {
-      "epoch": 2.900763358778626,
-      "grad_norm": 2.9067797660827637,
-      "learning_rate": 1.5388429752066116e-05,
-      "loss": 0.1258,
-      "step": 380
-    },
-    {
-      "epoch": 2.9770992366412212,
-      "grad_norm": 0.7466038465499878,
-      "learning_rate": 1.522314049586777e-05,
-      "loss": 0.1494,
-      "step": 390
-    },
-    {
-      "epoch": 3.0,
-      "eval_accuracy": 0.9692898272552783,
-      "eval_f1": 0.9692039233298336,
-      "eval_loss": 0.14140835404396057,
-      "eval_precision": 0.9706301207884909,
-      "eval_recall": 0.9692898272552783,
-      "eval_runtime": 2.9931,
-      "eval_samples_per_second": 174.066,
-      "eval_steps_per_second": 11.025,
-      "step": 393
-    },
-    {
-      "epoch": 3.053435114503817,
-      "grad_norm": 5.72157621383667,
-      "learning_rate": 1.5057851239669424e-05,
-      "loss": 0.1056,
-      "step": 400
-    },
-    {
-      "epoch": 3.1297709923664123,
-      "grad_norm": 2.024094343185425,
-      "learning_rate": 1.4892561983471077e-05,
-      "loss": 0.1402,
-      "step": 410
-    },
-    {
-      "epoch": 3.2061068702290076,
-      "grad_norm": 1.147175669670105,
-      "learning_rate": 1.4727272727272728e-05,
-      "loss": 0.0474,
-      "step": 420
-    },
-    {
-      "epoch": 3.282442748091603,
-      "grad_norm": 4.005652904510498,
-      "learning_rate": 1.4561983471074381e-05,
-      "loss": 0.076,
-      "step": 430
-    },
-    {
-      "epoch": 3.3587786259541983,
-      "grad_norm": 0.9247184991836548,
-      "learning_rate": 1.4396694214876035e-05,
-      "loss": 0.0709,
-      "step": 440
-    },
-    {
-      "epoch": 3.435114503816794,
-      "grad_norm": 4.577192306518555,
-      "learning_rate": 1.4231404958677688e-05,
-      "loss": 0.0678,
-      "step": 450
-    },
-    {
-      "epoch": 3.5114503816793894,
-      "grad_norm": 0.21287904679775238,
-      "learning_rate": 1.406611570247934e-05,
-      "loss": 0.0404,
-      "step": 460
-    },
-    {
-      "epoch": 3.5877862595419847,
-      "grad_norm": 0.67902010679245,
-      "learning_rate": 1.3900826446280993e-05,
-      "loss": 0.0508,
-      "step": 470
-    },
-    {
-      "epoch": 3.66412213740458,
-      "grad_norm": 0.8000791072845459,
-      "learning_rate": 1.3735537190082645e-05,
-      "loss": 0.0473,
-      "step": 480
-    },
-    {
-      "epoch": 3.7404580152671754,
-      "grad_norm": 1.3421847820281982,
-      "learning_rate": 1.35702479338843e-05,
-      "loss": 0.0223,
-      "step": 490
-    },
-    {
-      "epoch": 3.816793893129771,
-      "grad_norm": 0.182773157954216,
-      "learning_rate": 1.3404958677685951e-05,
-      "loss": 0.0196,
-      "step": 500
-    },
-    {
-      "epoch": 3.8931297709923665,
-      "grad_norm": 1.4306972026824951,
-      "learning_rate": 1.3239669421487604e-05,
-      "loss": 0.0205,
-      "step": 510
-    },
-    {
-      "epoch": 3.969465648854962,
-      "grad_norm": 1.896088719367981,
-      "learning_rate": 1.3074380165289257e-05,
-      "loss": 0.0212,
-      "step": 520
-    },
-    {
-      "epoch": 4.0,
-      "eval_accuracy": 0.9769673704414588,
-      "eval_f1": 0.9769688162747627,
-      "eval_loss": 0.12679165601730347,
-      "eval_precision": 0.9782792844480811,
-      "eval_recall": 0.9769673704414588,
-      "eval_runtime": 3.1232,
-      "eval_samples_per_second": 166.818,
-      "eval_steps_per_second": 10.566,
-      "step": 524
-    },
-    {
-      "epoch": 4.0458015267175576,
-      "grad_norm": 0.14600762724876404,
-      "learning_rate": 1.2909090909090912e-05,
-      "loss": 0.0222,
-      "step": 530
-    },
-    {
-      "epoch": 4.122137404580153,
-      "grad_norm": 8.074915885925293,
-      "learning_rate": 1.2743801652892563e-05,
-      "loss": 0.0542,
-      "step": 540
-    },
-    {
-      "epoch": 4.198473282442748,
-      "grad_norm": 0.09765351563692093,
-      "learning_rate": 1.2578512396694216e-05,
-      "loss": 0.0394,
-      "step": 550
-    },
-    {
-      "epoch": 4.2748091603053435,
-      "grad_norm": 0.33209875226020813,
-      "learning_rate": 1.2413223140495869e-05,
-      "loss": 0.0241,
-      "step": 560
-    },
-    {
-      "epoch": 4.351145038167939,
-      "grad_norm": 0.5309058427810669,
-      "learning_rate": 1.2247933884297522e-05,
-      "loss": 0.0161,
-      "step": 570
-    },
-    {
-      "epoch": 4.427480916030534,
-      "grad_norm": 0.1629948765039444,
-      "learning_rate": 1.2082644628099173e-05,
-      "loss": 0.0129,
-      "step": 580
-    },
-    {
-      "epoch": 4.5038167938931295,
-      "grad_norm": 0.15240447223186493,
-      "learning_rate": 1.1917355371900828e-05,
-      "loss": 0.0128,
-      "step": 590
-    },
-    {
-      "epoch": 4.580152671755725,
-      "grad_norm": 0.10693137347698212,
-      "learning_rate": 1.175206611570248e-05,
-      "loss": 0.0724,
-      "step": 600
-    },
-    {
-      "epoch": 4.65648854961832,
-      "grad_norm": 0.8860049843788147,
-      "learning_rate": 1.1586776859504133e-05,
-      "loss": 0.013,
-      "step": 610
-    },
-    {
-      "epoch": 4.732824427480916,
-      "grad_norm": 1.1124643087387085,
-      "learning_rate": 1.1421487603305785e-05,
-      "loss": 0.0228,
-      "step": 620
-    },
-    {
-      "epoch": 4.809160305343512,
-      "grad_norm": 16.63216209411621,
-      "learning_rate": 1.125619834710744e-05,
-      "loss": 0.1361,
-      "step": 630
-    },
-    {
-      "epoch": 4.885496183206107,
-      "grad_norm": 0.22511304914951324,
-      "learning_rate": 1.1090909090909092e-05,
-      "loss": 0.0127,
-      "step": 640
-    },
-    {
-      "epoch": 4.961832061068702,
-      "grad_norm": 0.2706206142902374,
-      "learning_rate": 1.0925619834710745e-05,
-      "loss": 0.0127,
-      "step": 650
-    },
-    {
-      "epoch": 5.0,
-      "eval_accuracy": 0.9731285988483686,
-      "eval_f1": 0.973064976375106,
-      "eval_loss": 0.14903880655765533,
-      "eval_precision": 0.9743006090972162,
-      "eval_recall": 0.9731285988483686,
-      "eval_runtime": 3.1635,
-      "eval_samples_per_second": 164.69,
-      "eval_steps_per_second": 10.431,
-      "step": 655
-    },
-    {
-      "epoch": 5.038167938931298,
-      "grad_norm": 26.473268508911133,
-      "learning_rate": 1.0760330578512396e-05,
-      "loss": 0.0238,
-      "step": 660
-    },
-    {
-      "epoch": 5.114503816793893,
-      "grad_norm": 0.08209118992090225,
-      "learning_rate": 1.0595041322314051e-05,
-      "loss": 0.0107,
-      "step": 670
-    },
-    {
-      "epoch": 5.190839694656488,
-      "grad_norm": 1.1051641702651978,
-      "learning_rate": 1.0429752066115704e-05,
-      "loss": 0.0682,
-      "step": 680
-    },
-    {
-      "epoch": 5.267175572519084,
-      "grad_norm": 11.310916900634766,
-      "learning_rate": 1.0264462809917357e-05,
-      "loss": 0.0131,
-      "step": 690
-    },
-    {
-      "epoch": 5.34351145038168,
-      "grad_norm": 0.09134263545274734,
-      "learning_rate": 1.0099173553719008e-05,
-      "loss": 0.0428,
-      "step": 700
-    },
-    {
-      "epoch": 5.419847328244275,
-      "grad_norm": 0.08655811846256256,
-      "learning_rate": 9.933884297520661e-06,
-      "loss": 0.0246,
-      "step": 710
-    },
-    {
-      "epoch": 5.4961832061068705,
-      "grad_norm": 0.16410402953624725,
-      "learning_rate": 9.768595041322316e-06,
-      "loss": 0.0094,
-      "step": 720
-    },
-    {
-      "epoch": 5.572519083969466,
-      "grad_norm": 1.349546194076538,
-      "learning_rate": 9.603305785123967e-06,
-      "loss": 0.045,
-      "step": 730
-    },
-    {
-      "epoch": 5.648854961832061,
-      "grad_norm": 0.12257255613803864,
-      "learning_rate": 9.438016528925621e-06,
-      "loss": 0.0516,
-      "step": 740
-    },
-    {
-      "epoch": 5.7251908396946565,
-      "grad_norm": 0.06910885125398636,
-      "learning_rate": 9.272727272727273e-06,
-      "loss": 0.009,
-      "step": 750
-    },
-    {
-      "epoch": 5.801526717557252,
-      "grad_norm": 0.056132227182388306,
-      "learning_rate": 9.107438016528927e-06,
-      "loss": 0.0107,
-      "step": 760
-    },
-    {
-      "epoch": 5.877862595419847,
-      "grad_norm": 0.07667958736419678,
-      "learning_rate": 8.942148760330578e-06,
-      "loss": 0.0086,
-      "step": 770
-    },
-    {
-      "epoch": 5.9541984732824424,
-      "grad_norm": 0.10609736293554306,
-      "learning_rate": 8.776859504132233e-06,
-      "loss": 0.0085,
-      "step": 780
-    },
-    {
-      "epoch": 6.0,
-      "eval_accuracy": 0.9788867562380038,
-      "eval_f1": 0.9789720270641704,
-      "eval_loss": 0.12155096977949142,
-      "eval_precision": 0.9801130700504813,
-      "eval_recall": 0.9788867562380038,
-      "eval_runtime": 3.2955,
-      "eval_samples_per_second": 158.096,
-      "eval_steps_per_second": 10.014,
-      "step": 786
-    },
-    {
-      "epoch": 6.030534351145038,
-      "grad_norm": 0.06408526748418808,
-      "learning_rate": 8.611570247933884e-06,
-      "loss": 0.0081,
-      "step": 790
-    },
-    {
-      "epoch": 6.106870229007634,
-      "grad_norm": 0.07884930074214935,
-      "learning_rate": 8.446280991735539e-06,
-      "loss": 0.031,
-      "step": 800
-    },
-    {
-      "epoch": 6.183206106870229,
-      "grad_norm": 0.07998275011777878,
-      "learning_rate": 8.28099173553719e-06,
-      "loss": 0.0453,
-      "step": 810
-    },
-    {
-      "epoch": 6.259541984732825,
-      "grad_norm": 0.22578206658363342,
-      "learning_rate": 8.115702479338843e-06,
-      "loss": 0.0078,
-      "step": 820
-    },
-    {
-      "epoch": 6.33587786259542,
-      "grad_norm": 0.07642875611782074,
-      "learning_rate": 7.950413223140496e-06,
-      "loss": 0.0086,
-      "step": 830
-    },
-    {
-      "epoch": 6.412213740458015,
-      "grad_norm": 0.10305721312761307,
-      "learning_rate": 7.785123966942149e-06,
-      "loss": 0.0444,
-      "step": 840
-    },
-    {
-      "epoch": 6.488549618320611,
-      "grad_norm": 0.0701122134923935,
-      "learning_rate": 7.619834710743802e-06,
-      "loss": 0.0077,
-      "step": 850
-    },
-    {
-      "epoch": 6.564885496183206,
-      "grad_norm": 0.07119292765855789,
-      "learning_rate": 7.454545454545456e-06,
-      "loss": 0.0076,
-      "step": 860
-    },
-    {
-      "epoch": 6.641221374045801,
-      "grad_norm": 0.2685672342777252,
-      "learning_rate": 7.289256198347108e-06,
-      "loss": 0.0077,
-      "step": 870
-    },
-    {
-      "epoch": 6.717557251908397,
-      "grad_norm": 0.0628926083445549,
-      "learning_rate": 7.1239669421487615e-06,
-      "loss": 0.0072,
-      "step": 880
-    },
-    {
-      "epoch": 6.793893129770993,
-      "grad_norm": 0.06299301236867905,
-      "learning_rate": 6.9586776859504135e-06,
-      "loss": 0.0109,
-      "step": 890
-    },
-    {
-      "epoch": 6.870229007633588,
-      "grad_norm": 0.06120818480849266,
-      "learning_rate": 6.793388429752067e-06,
-      "loss": 0.0069,
-      "step": 900
-    },
-    {
-      "epoch": 6.9465648854961835,
-      "grad_norm": 0.08700945228338242,
-      "learning_rate": 6.628099173553719e-06,
-      "loss": 0.0073,
-      "step": 910
-    },
-    {
-      "epoch": 7.0,
-      "eval_accuracy": 0.980806142034549,
-      "eval_f1": 0.980791368968265,
-      "eval_loss": 0.11875477433204651,
-      "eval_precision": 0.981936841149893,
-      "eval_recall": 0.980806142034549,
-      "eval_runtime": 3.1069,
-      "eval_samples_per_second": 167.692,
-      "eval_steps_per_second": 10.622,
-      "step": 917
-    },
-    {
-      "epoch": 7.022900763358779,
-      "grad_norm": 0.061511170119047165,
-      "learning_rate": 6.462809917355372e-06,
-      "loss": 0.0066,
-      "step": 920
-    },
-    {
-      "epoch": 7.099236641221374,
-      "grad_norm": 0.06128810718655586,
-      "learning_rate": 6.297520661157025e-06,
-      "loss": 0.0064,
-      "step": 930
-    },
-    {
-      "epoch": 7.175572519083969,
-      "grad_norm": 0.05454257130622864,
-      "learning_rate": 6.132231404958678e-06,
-      "loss": 0.0066,
-      "step": 940
-    },
-    {
-      "epoch": 7.251908396946565,
-      "grad_norm": 0.09356739372015,
-      "learning_rate": 5.966942148760331e-06,
-      "loss": 0.0065,
-      "step": 950
-    },
-    {
-      "epoch": 7.32824427480916,
-      "grad_norm": 0.04699549078941345,
-      "learning_rate": 5.801652892561984e-06,
-      "loss": 0.006,
-      "step": 960
-    },
-    {
-      "epoch": 7.404580152671755,
-      "grad_norm": 0.04597270488739014,
-      "learning_rate": 5.636363636363636e-06,
-      "loss": 0.0063,
-      "step": 970
-    },
-    {
-      "epoch": 7.480916030534351,
-      "grad_norm": 0.05777190253138542,
-      "learning_rate": 5.47107438016529e-06,
-      "loss": 0.0057,
-      "step": 980
-    },
-    {
-      "epoch": 7.557251908396947,
-      "grad_norm": 0.0520237572491169,
-      "learning_rate": 5.305785123966942e-06,
-      "loss": 0.006,
-      "step": 990
-    },
-    {
-      "epoch": 7.633587786259542,
-      "grad_norm": 0.0427822545170784,
-      "learning_rate": 5.140495867768596e-06,
-      "loss": 0.0059,
-      "step": 1000
-    },
-    {
-      "epoch": 7.709923664122138,
-      "grad_norm": 0.05699237063527107,
-      "learning_rate": 4.975206611570249e-06,
-      "loss": 0.0055,
-      "step": 1010
-    },
-    {
-      "epoch": 7.786259541984733,
-      "grad_norm": 0.05885695666074753,
-      "learning_rate": 4.8099173553719015e-06,
-      "loss": 0.0258,
-      "step": 1020
-    },
-    {
-      "epoch": 7.862595419847328,
-      "grad_norm": 0.05190462991595268,
-      "learning_rate": 4.6446280991735544e-06,
-      "loss": 0.0496,
-      "step": 1030
-    },
-    {
-      "epoch": 7.938931297709924,
-      "grad_norm": 0.03909669816493988,
-      "learning_rate": 4.479338842975207e-06,
-      "loss": 0.0398,
-      "step": 1040
-    },
-    {
-      "epoch": 8.0,
-      "eval_accuracy": 0.980806142034549,
-      "eval_f1": 0.980791368968265,
-      "eval_loss": 0.12089628726243973,
-      "eval_precision": 0.981936841149893,
-      "eval_recall": 0.980806142034549,
-      "eval_runtime": 3.1129,
-      "eval_samples_per_second": 167.366,
-      "eval_steps_per_second": 10.601,
-      "step": 1048
-    },
-    {
-      "epoch": 8.01526717557252,
-      "grad_norm": 0.05181822180747986,
-      "learning_rate": 4.31404958677686e-06,
-      "loss": 0.0062,
-      "step": 1050
-    },
-    {
-      "epoch": 8.091603053435115,
-      "grad_norm": 0.03777517005801201,
-      "learning_rate": 4.148760330578513e-06,
-      "loss": 0.0058,
-      "step": 1060
-    },
-    {
-      "epoch": 8.16793893129771,
-      "grad_norm": 0.04515732452273369,
-      "learning_rate": 3.983471074380166e-06,
-      "loss": 0.0056,
-      "step": 1070
-    },
-    {
-      "epoch": 8.244274809160306,
-      "grad_norm": 0.044928282499313354,
-      "learning_rate": 3.818181818181819e-06,
-      "loss": 0.0055,
-      "step": 1080
-    },
-    {
-      "epoch": 8.320610687022901,
-      "grad_norm": 0.05599347501993179,
-      "learning_rate": 3.6528925619834715e-06,
-      "loss": 0.0057,
-      "step": 1090
-    },
-    {
-      "epoch": 8.396946564885496,
-      "grad_norm": 1.0466651916503906,
-      "learning_rate": 3.4876033057851245e-06,
-      "loss": 0.0384,
-      "step": 1100
-    },
-    {
-      "epoch": 8.473282442748092,
-      "grad_norm": 0.05839056894183159,
-      "learning_rate": 3.3223140495867774e-06,
-      "loss": 0.0057,
-      "step": 1110
-    },
-    {
-      "epoch": 8.549618320610687,
-      "grad_norm": 0.05969908460974693,
-      "learning_rate": 3.1570247933884303e-06,
-      "loss": 0.0424,
-      "step": 1120
-    },
-    {
-      "epoch": 8.625954198473282,
-      "grad_norm": 0.06252706795930862,
-      "learning_rate": 2.9917355371900832e-06,
-      "loss": 0.0174,
-      "step": 1130
-    },
-    {
-      "epoch": 8.702290076335878,
-      "grad_norm": 0.1538064330816269,
-      "learning_rate": 2.8264462809917357e-06,
-      "loss": 0.0058,
-      "step": 1140
-    },
-    {
-      "epoch": 8.778625954198473,
-      "grad_norm": 0.05743182823061943,
-      "learning_rate": 2.6611570247933886e-06,
-      "loss": 0.0055,
-      "step": 1150
-    },
-    {
-      "epoch": 8.854961832061068,
-      "grad_norm": 0.06665431708097458,
-      "learning_rate": 2.4958677685950416e-06,
-      "loss": 0.0057,
-      "step": 1160
-    },
-    {
-      "epoch": 8.931297709923664,
-      "grad_norm": 0.07899218052625656,
-      "learning_rate": 2.3305785123966945e-06,
-      "loss": 0.0055,
-      "step": 1170
-    },
-    {
-      "epoch": 9.0,
-      "eval_accuracy": 0.980806142034549,
-      "eval_f1": 0.9807999304274743,
-      "eval_loss": 0.12244618684053421,
-      "eval_precision": 0.9819563131797131,
-      "eval_recall": 0.980806142034549,
-      "eval_runtime": 3.1404,
-      "eval_samples_per_second": 165.902,
-      "eval_steps_per_second": 10.508,
-      "step": 1179
-    },
-    {
-      "epoch": 9.007633587786259,
-      "grad_norm": 0.03778070956468582,
-      "learning_rate": 2.1652892561983474e-06,
-      "loss": 0.0054,
-      "step": 1180
-    },
-    {
-      "epoch": 9.083969465648854,
-      "grad_norm": 0.0397595539689064,
-      "learning_rate": 2.0000000000000003e-06,
-      "loss": 0.0074,
-      "step": 1190
-    },
-    {
-      "epoch": 9.16030534351145,
-      "grad_norm": 0.03877999261021614,
-      "learning_rate": 1.8347107438016533e-06,
-      "loss": 0.0053,
-      "step": 1200
-    },
-    {
-      "epoch": 9.236641221374045,
-      "grad_norm": 0.05299071595072746,
-      "learning_rate": 1.669421487603306e-06,
-      "loss": 0.0052,
-      "step": 1210
-    },
-    {
-      "epoch": 9.312977099236642,
-      "grad_norm": 0.08447615802288055,
-      "learning_rate": 1.5041322314049589e-06,
-      "loss": 0.0053,
-      "step": 1220
-    },
-    {
-      "epoch": 9.389312977099237,
-      "grad_norm": 0.05665115639567375,
-      "learning_rate": 1.3388429752066118e-06,
-      "loss": 0.0053,
-      "step": 1230
-    },
-    {
-      "epoch": 9.465648854961833,
-      "grad_norm": 0.04538382589817047,
-      "learning_rate": 1.1735537190082645e-06,
-      "loss": 0.0055,
-      "step": 1240
-    },
-    {
-      "epoch": 9.541984732824428,
-      "grad_norm": 0.04416332021355629,
-      "learning_rate": 1.0082644628099174e-06,
-      "loss": 0.0051,
-      "step": 1250
-    },
-    {
-      "epoch": 9.618320610687023,
-      "grad_norm": 0.03218664601445198,
-      "learning_rate": 8.429752066115703e-07,
-      "loss": 0.0054,
-      "step": 1260
-    },
-    {
-      "epoch": 9.694656488549619,
-      "grad_norm": 0.05276583135128021,
-      "learning_rate": 6.776859504132232e-07,
-      "loss": 0.0052,
-      "step": 1270
-    },
-    {
-      "epoch": 9.770992366412214,
-      "grad_norm": 0.03935326635837555,
-      "learning_rate": 5.123966942148761e-07,
-      "loss": 0.0057,
-      "step": 1280
-    },
-    {
-      "epoch": 9.84732824427481,
-      "grad_norm": 0.05612946301698685,
-      "learning_rate": 3.4710743801652896e-07,
-      "loss": 0.0353,
-      "step": 1290
-    },
-    {
-      "epoch": 9.923664122137405,
-      "grad_norm": 0.03918612375855446,
-      "learning_rate": 1.8181818181818183e-07,
-      "loss": 0.005,
-      "step": 1300
-    },
-    {
-      "epoch": 10.0,
-      "grad_norm": 0.12464316934347153,
-      "learning_rate": 1.652892561983471e-08,
-      "loss": 0.0425,
-      "step": 1310
-    },
-    {
-      "epoch": 10.0,
-      "eval_accuracy": 0.982725527831094,
-      "eval_f1": 0.9827168072229782,
-      "eval_loss": 0.12131477892398834,
-      "eval_precision": 0.9837922474198865,
-      "eval_recall": 0.982725527831094,
-      "eval_runtime": 3.118,
-      "eval_samples_per_second": 167.095,
-      "eval_steps_per_second": 10.584,
-      "step": 1310
-    }
-  ],
-  "logging_steps": 10,
-  "max_steps": 1310,
-  "num_input_tokens_seen": 0,
-  "num_train_epochs": 10,
-  "save_steps": 500,
-  "stateful_callbacks": {
-    "EarlyStoppingCallback": {
-      "args": {
-        "early_stopping_patience": 3,
-        "early_stopping_threshold": 0.0
-      },
-      "attributes": {
-        "early_stopping_patience_counter": 0
-      }
-    },
-    "TrainerControl": {
-      "args": {
-        "should_epoch_stop": false,
-        "should_evaluate": false,
-        "should_log": false,
-        "should_save": true,
-        "should_training_stop": true
-      },
-      "attributes": {}
-    }
-  },
-  "total_flos": 1370372257351680.0,
-  "train_batch_size": 16,
-  "trial_name": null,
-  "trial_params": null
-}

checkpoint-1310/training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:21a4ac2a72f23cb69080a4fb3a9a3266e6a76062c2c55904cad3d4237f62c83e
-size 5841