End of training

Browse files

Files changed (5) hide show

README.md +5 -5
all_results.json +16 -0
eval_results.json +11 -0
train_results.json +8 -0
trainer_state.json +1676 -0

README.md CHANGED Viewed

@@ -23,11 +23,11 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [facebook/deit-base-patch16-224](https://huggingface.co/facebook/deit-base-patch16-224) on the medmnist-v2 dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.2558
-- Accuracy: 0.8997
-- Precision: 0.8463
-- Recall: 0.8395
-- F1: 0.8416
 ## Model description

 This model is a fine-tuned version of [facebook/deit-base-patch16-224](https://huggingface.co/facebook/deit-base-patch16-224) on the medmnist-v2 dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.4815
+- Accuracy: 0.8080
+- Precision: 0.7703
+- Recall: 0.7686
+- F1: 0.7650
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "epoch": 10.0,
+    "eval_accuracy": 0.8080190282025145,
+    "eval_f1": 0.7649840068771034,
+    "eval_loss": 0.4814508855342865,
+    "eval_precision": 0.7703206524276631,
+    "eval_recall": 0.7685966768079108,
+    "eval_runtime": 72.3887,
+    "eval_samples_per_second": 121.967,
+    "eval_steps_per_second": 7.625,
+    "total_flos": 1.0878579515820442e+19,
+    "train_loss": 0.69453261346992,
+    "train_runtime": 2383.9912,
+    "train_samples_per_second": 58.473,
+    "train_steps_per_second": 0.914
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "epoch": 10.0,
+    "eval_accuracy": 0.8080190282025145,
+    "eval_f1": 0.7649840068771034,
+    "eval_loss": 0.4814508855342865,
+    "eval_precision": 0.7703206524276631,
+    "eval_recall": 0.7685966768079108,
+    "eval_runtime": 72.3887,
+    "eval_samples_per_second": 121.967,
+    "eval_steps_per_second": 7.625
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 10.0,
+    "total_flos": 1.0878579515820442e+19,
+    "train_loss": 0.69453261346992,
+    "train_runtime": 2383.9912,
+    "train_samples_per_second": 58.473,
+    "train_steps_per_second": 0.914
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1676 @@

+{
+  "best_metric": 0.9061990212071778,
+  "best_model_checkpoint": "deit-base-patch16-224-finetuned-lora-medmnistv2/checkpoint-1526",
+  "epoch": 10.0,
+  "eval_steps": 500,
+  "global_step": 2180,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "grad_norm": 3.7138895988464355,
+      "learning_rate": 0.004977064220183487,
+      "loss": 1.9548,
+      "step": 10
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 3.300619125366211,
+      "learning_rate": 0.004954128440366973,
+      "loss": 1.5287,
+      "step": 20
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 2.6508607864379883,
+      "learning_rate": 0.004931192660550459,
+      "loss": 1.3837,
+      "step": 30
+    },
+    {
+      "epoch": 0.18,
+      "grad_norm": 2.1517019271850586,
+      "learning_rate": 0.004908256880733945,
+      "loss": 1.2375,
+      "step": 40
+    },
+    {
+      "epoch": 0.23,
+      "grad_norm": 1.7724459171295166,
+      "learning_rate": 0.004885321100917431,
+      "loss": 1.2391,
+      "step": 50
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 2.2682437896728516,
+      "learning_rate": 0.004862385321100918,
+      "loss": 1.1129,
+      "step": 60
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 1.4531511068344116,
+      "learning_rate": 0.004839449541284404,
+      "loss": 1.1138,
+      "step": 70
+    },
+    {
+      "epoch": 0.37,
+      "grad_norm": 2.0359365940093994,
+      "learning_rate": 0.00481651376146789,
+      "loss": 1.0654,
+      "step": 80
+    },
+    {
+      "epoch": 0.41,
+      "grad_norm": 1.8885788917541504,
+      "learning_rate": 0.004793577981651377,
+      "loss": 1.1075,
+      "step": 90
+    },
+    {
+      "epoch": 0.46,
+      "grad_norm": 2.3016016483306885,
+      "learning_rate": 0.0047706422018348625,
+      "loss": 1.075,
+      "step": 100
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 2.251934289932251,
+      "learning_rate": 0.004747706422018348,
+      "loss": 1.0396,
+      "step": 110
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 1.179795503616333,
+      "learning_rate": 0.004724770642201835,
+      "loss": 1.036,
+      "step": 120
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 1.6593384742736816,
+      "learning_rate": 0.004701834862385321,
+      "loss": 1.0118,
+      "step": 130
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 2.6405937671661377,
+      "learning_rate": 0.004678899082568808,
+      "loss": 0.9888,
+      "step": 140
+    },
+    {
+      "epoch": 0.69,
+      "grad_norm": 1.8541159629821777,
+      "learning_rate": 0.004655963302752294,
+      "loss": 0.9314,
+      "step": 150
+    },
+    {
+      "epoch": 0.73,
+      "grad_norm": 1.5319859981536865,
+      "learning_rate": 0.00463302752293578,
+      "loss": 0.9473,
+      "step": 160
+    },
+    {
+      "epoch": 0.78,
+      "grad_norm": 1.5615304708480835,
+      "learning_rate": 0.004610091743119266,
+      "loss": 0.9362,
+      "step": 170
+    },
+    {
+      "epoch": 0.83,
+      "grad_norm": 1.7751407623291016,
+      "learning_rate": 0.0045871559633027525,
+      "loss": 1.0674,
+      "step": 180
+    },
+    {
+      "epoch": 0.87,
+      "grad_norm": 1.6211912631988525,
+      "learning_rate": 0.004564220183486238,
+      "loss": 1.0766,
+      "step": 190
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 2.1214470863342285,
+      "learning_rate": 0.004541284403669725,
+      "loss": 0.9675,
+      "step": 200
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 2.0745882987976074,
+      "learning_rate": 0.004518348623853211,
+      "loss": 0.9804,
+      "step": 210
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7243066884176182,
+      "eval_f1": 0.6426465488409587,
+      "eval_loss": 0.6885228753089905,
+      "eval_precision": 0.7882942358945201,
+      "eval_recall": 0.6660603565571163,
+      "eval_runtime": 20.0858,
+      "eval_samples_per_second": 122.076,
+      "eval_steps_per_second": 7.667,
+      "step": 218
+    },
+    {
+      "epoch": 1.01,
+      "grad_norm": 1.6901938915252686,
+      "learning_rate": 0.004495412844036698,
+      "loss": 1.0215,
+      "step": 220
+    },
+    {
+      "epoch": 1.06,
+      "grad_norm": 1.575714111328125,
+      "learning_rate": 0.004472477064220184,
+      "loss": 0.8812,
+      "step": 230
+    },
+    {
+      "epoch": 1.1,
+      "grad_norm": 2.2109575271606445,
+      "learning_rate": 0.0044495412844036695,
+      "loss": 0.8834,
+      "step": 240
+    },
+    {
+      "epoch": 1.15,
+      "grad_norm": 1.79192316532135,
+      "learning_rate": 0.004426605504587156,
+      "loss": 0.8381,
+      "step": 250
+    },
+    {
+      "epoch": 1.19,
+      "grad_norm": 2.0963821411132812,
+      "learning_rate": 0.004403669724770643,
+      "loss": 1.0578,
+      "step": 260
+    },
+    {
+      "epoch": 1.24,
+      "grad_norm": 1.5217491388320923,
+      "learning_rate": 0.004380733944954128,
+      "loss": 0.9884,
+      "step": 270
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 1.9401984214782715,
+      "learning_rate": 0.004357798165137615,
+      "loss": 0.9571,
+      "step": 280
+    },
+    {
+      "epoch": 1.33,
+      "grad_norm": 2.2098371982574463,
+      "learning_rate": 0.0043348623853211015,
+      "loss": 0.8079,
+      "step": 290
+    },
+    {
+      "epoch": 1.38,
+      "grad_norm": 1.31866455078125,
+      "learning_rate": 0.004311926605504587,
+      "loss": 0.9675,
+      "step": 300
+    },
+    {
+      "epoch": 1.42,
+      "grad_norm": 1.6946964263916016,
+      "learning_rate": 0.004288990825688073,
+      "loss": 0.9153,
+      "step": 310
+    },
+    {
+      "epoch": 1.47,
+      "grad_norm": 1.3618961572647095,
+      "learning_rate": 0.0042660550458715595,
+      "loss": 0.9402,
+      "step": 320
+    },
+    {
+      "epoch": 1.51,
+      "grad_norm": 1.3714025020599365,
+      "learning_rate": 0.004243119266055046,
+      "loss": 0.9622,
+      "step": 330
+    },
+    {
+      "epoch": 1.56,
+      "grad_norm": 1.4716968536376953,
+      "learning_rate": 0.004220183486238533,
+      "loss": 0.9493,
+      "step": 340
+    },
+    {
+      "epoch": 1.61,
+      "grad_norm": 1.8330761194229126,
+      "learning_rate": 0.004197247706422018,
+      "loss": 0.8914,
+      "step": 350
+    },
+    {
+      "epoch": 1.65,
+      "grad_norm": 1.2121117115020752,
+      "learning_rate": 0.004174311926605505,
+      "loss": 0.9925,
+      "step": 360
+    },
+    {
+      "epoch": 1.7,
+      "grad_norm": 1.6328595876693726,
+      "learning_rate": 0.004151376146788991,
+      "loss": 0.8127,
+      "step": 370
+    },
+    {
+      "epoch": 1.74,
+      "grad_norm": 1.02108895778656,
+      "learning_rate": 0.004128440366972477,
+      "loss": 0.8554,
+      "step": 380
+    },
+    {
+      "epoch": 1.79,
+      "grad_norm": 1.5831329822540283,
+      "learning_rate": 0.004105504587155963,
+      "loss": 0.8917,
+      "step": 390
+    },
+    {
+      "epoch": 1.83,
+      "grad_norm": 1.4080381393432617,
+      "learning_rate": 0.00408256880733945,
+      "loss": 0.8993,
+      "step": 400
+    },
+    {
+      "epoch": 1.88,
+      "grad_norm": 1.5948288440704346,
+      "learning_rate": 0.004059633027522936,
+      "loss": 0.8667,
+      "step": 410
+    },
+    {
+      "epoch": 1.93,
+      "grad_norm": 1.1762131452560425,
+      "learning_rate": 0.004036697247706422,
+      "loss": 0.8155,
+      "step": 420
+    },
+    {
+      "epoch": 1.97,
+      "grad_norm": 1.6054571866989136,
+      "learning_rate": 0.0040137614678899085,
+      "loss": 0.9277,
+      "step": 430
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.850326264274062,
+      "eval_f1": 0.7680457132974591,
+      "eval_loss": 0.35129514336586,
+      "eval_precision": 0.7635420667300096,
+      "eval_recall": 0.7943474051999218,
+      "eval_runtime": 20.1251,
+      "eval_samples_per_second": 121.838,
+      "eval_steps_per_second": 7.652,
+      "step": 436
+    },
+    {
+      "epoch": 2.02,
+      "grad_norm": 1.8250489234924316,
+      "learning_rate": 0.003990825688073394,
+      "loss": 0.8701,
+      "step": 440
+    },
+    {
+      "epoch": 2.06,
+      "grad_norm": 1.3763368129730225,
+      "learning_rate": 0.003967889908256881,
+      "loss": 0.8898,
+      "step": 450
+    },
+    {
+      "epoch": 2.11,
+      "grad_norm": 1.257175326347351,
+      "learning_rate": 0.003944954128440367,
+      "loss": 0.8269,
+      "step": 460
+    },
+    {
+      "epoch": 2.16,
+      "grad_norm": 1.4226521253585815,
+      "learning_rate": 0.003922018348623853,
+      "loss": 0.7581,
+      "step": 470
+    },
+    {
+      "epoch": 2.2,
+      "grad_norm": 1.8739672899246216,
+      "learning_rate": 0.0038990825688073397,
+      "loss": 0.8385,
+      "step": 480
+    },
+    {
+      "epoch": 2.25,
+      "grad_norm": 1.9286770820617676,
+      "learning_rate": 0.003876146788990826,
+      "loss": 0.8155,
+      "step": 490
+    },
+    {
+      "epoch": 2.29,
+      "grad_norm": 1.2533843517303467,
+      "learning_rate": 0.0038532110091743124,
+      "loss": 0.7766,
+      "step": 500
+    },
+    {
+      "epoch": 2.34,
+      "grad_norm": 1.5577070713043213,
+      "learning_rate": 0.003830275229357798,
+      "loss": 0.7792,
+      "step": 510
+    },
+    {
+      "epoch": 2.39,
+      "grad_norm": 1.9237123727798462,
+      "learning_rate": 0.0038073394495412843,
+      "loss": 0.8341,
+      "step": 520
+    },
+    {
+      "epoch": 2.43,
+      "grad_norm": 1.455471158027649,
+      "learning_rate": 0.003784403669724771,
+      "loss": 0.8508,
+      "step": 530
+    },
+    {
+      "epoch": 2.48,
+      "grad_norm": 1.77620267868042,
+      "learning_rate": 0.003761467889908257,
+      "loss": 0.8548,
+      "step": 540
+    },
+    {
+      "epoch": 2.52,
+      "grad_norm": 1.7046033143997192,
+      "learning_rate": 0.003738532110091743,
+      "loss": 0.7772,
+      "step": 550
+    },
+    {
+      "epoch": 2.57,
+      "grad_norm": 1.554868459701538,
+      "learning_rate": 0.0037155963302752293,
+      "loss": 0.7189,
+      "step": 560
+    },
+    {
+      "epoch": 2.61,
+      "grad_norm": 1.6947243213653564,
+      "learning_rate": 0.003692660550458716,
+      "loss": 0.8345,
+      "step": 570
+    },
+    {
+      "epoch": 2.66,
+      "grad_norm": 1.3895587921142578,
+      "learning_rate": 0.003669724770642202,
+      "loss": 0.758,
+      "step": 580
+    },
+    {
+      "epoch": 2.71,
+      "grad_norm": 1.3375391960144043,
+      "learning_rate": 0.0036467889908256878,
+      "loss": 0.7893,
+      "step": 590
+    },
+    {
+      "epoch": 2.75,
+      "grad_norm": 2.090715169906616,
+      "learning_rate": 0.0036238532110091743,
+      "loss": 0.795,
+      "step": 600
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 1.4841378927230835,
+      "learning_rate": 0.0036009174311926605,
+      "loss": 0.8813,
+      "step": 610
+    },
+    {
+      "epoch": 2.84,
+      "grad_norm": 1.7425097227096558,
+      "learning_rate": 0.003577981651376147,
+      "loss": 0.8691,
+      "step": 620
+    },
+    {
+      "epoch": 2.89,
+      "grad_norm": 1.592509388923645,
+      "learning_rate": 0.003555045871559633,
+      "loss": 0.6923,
+      "step": 630
+    },
+    {
+      "epoch": 2.94,
+      "grad_norm": 1.178277611732483,
+      "learning_rate": 0.0035321100917431194,
+      "loss": 0.8297,
+      "step": 640
+    },
+    {
+      "epoch": 2.98,
+      "grad_norm": 1.2316228151321411,
+      "learning_rate": 0.0035091743119266055,
+      "loss": 0.8144,
+      "step": 650
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.8544045676998369,
+      "eval_f1": 0.790941601757907,
+      "eval_loss": 0.36142271757125854,
+      "eval_precision": 0.8330895565628892,
+      "eval_recall": 0.7960896326958813,
+      "eval_runtime": 20.0968,
+      "eval_samples_per_second": 122.01,
+      "eval_steps_per_second": 7.663,
+      "step": 654
+    },
+    {
+      "epoch": 3.03,
+      "grad_norm": 1.9384276866912842,
+      "learning_rate": 0.003486238532110092,
+      "loss": 0.7581,
+      "step": 660
+    },
+    {
+      "epoch": 3.07,
+      "grad_norm": 1.494698405265808,
+      "learning_rate": 0.003463302752293578,
+      "loss": 0.7494,
+      "step": 670
+    },
+    {
+      "epoch": 3.12,
+      "grad_norm": 1.741979956626892,
+      "learning_rate": 0.0034403669724770644,
+      "loss": 0.6721,
+      "step": 680
+    },
+    {
+      "epoch": 3.17,
+      "grad_norm": 1.857740879058838,
+      "learning_rate": 0.0034174311926605506,
+      "loss": 0.7034,
+      "step": 690
+    },
+    {
+      "epoch": 3.21,
+      "grad_norm": 1.5749099254608154,
+      "learning_rate": 0.003394495412844037,
+      "loss": 0.7933,
+      "step": 700
+    },
+    {
+      "epoch": 3.26,
+      "grad_norm": 1.865283727645874,
+      "learning_rate": 0.003371559633027523,
+      "loss": 0.7655,
+      "step": 710
+    },
+    {
+      "epoch": 3.3,
+      "grad_norm": 2.0680484771728516,
+      "learning_rate": 0.003348623853211009,
+      "loss": 0.7182,
+      "step": 720
+    },
+    {
+      "epoch": 3.35,
+      "grad_norm": 1.419133186340332,
+      "learning_rate": 0.0033256880733944956,
+      "loss": 0.7587,
+      "step": 730
+    },
+    {
+      "epoch": 3.39,
+      "grad_norm": 1.6129292249679565,
+      "learning_rate": 0.0033027522935779817,
+      "loss": 0.6984,
+      "step": 740
+    },
+    {
+      "epoch": 3.44,
+      "grad_norm": 1.2033915519714355,
+      "learning_rate": 0.003279816513761468,
+      "loss": 0.8069,
+      "step": 750
+    },
+    {
+      "epoch": 3.49,
+      "grad_norm": 1.6685665845870972,
+      "learning_rate": 0.003256880733944954,
+      "loss": 0.7586,
+      "step": 760
+    },
+    {
+      "epoch": 3.53,
+      "grad_norm": 1.3577849864959717,
+      "learning_rate": 0.0032339449541284406,
+      "loss": 0.7577,
+      "step": 770
+    },
+    {
+      "epoch": 3.58,
+      "grad_norm": 1.4581727981567383,
+      "learning_rate": 0.003211009174311927,
+      "loss": 0.7977,
+      "step": 780
+    },
+    {
+      "epoch": 3.62,
+      "grad_norm": 1.547544240951538,
+      "learning_rate": 0.0031880733944954125,
+      "loss": 0.7738,
+      "step": 790
+    },
+    {
+      "epoch": 3.67,
+      "grad_norm": 1.6125229597091675,
+      "learning_rate": 0.003165137614678899,
+      "loss": 0.748,
+      "step": 800
+    },
+    {
+      "epoch": 3.72,
+      "grad_norm": 1.4292904138565063,
+      "learning_rate": 0.0031422018348623852,
+      "loss": 0.7275,
+      "step": 810
+    },
+    {
+      "epoch": 3.76,
+      "grad_norm": 1.8630807399749756,
+      "learning_rate": 0.003119266055045872,
+      "loss": 0.7082,
+      "step": 820
+    },
+    {
+      "epoch": 3.81,
+      "grad_norm": 1.2151238918304443,
+      "learning_rate": 0.0030963302752293575,
+      "loss": 0.7476,
+      "step": 830
+    },
+    {
+      "epoch": 3.85,
+      "grad_norm": 1.1003532409667969,
+      "learning_rate": 0.003073394495412844,
+      "loss": 0.6765,
+      "step": 840
+    },
+    {
+      "epoch": 3.9,
+      "grad_norm": 1.6830847263336182,
+      "learning_rate": 0.0030504587155963303,
+      "loss": 0.6833,
+      "step": 850
+    },
+    {
+      "epoch": 3.94,
+      "grad_norm": 1.3484947681427002,
+      "learning_rate": 0.003027522935779817,
+      "loss": 0.7556,
+      "step": 860
+    },
+    {
+      "epoch": 3.99,
+      "grad_norm": 1.2535579204559326,
+      "learning_rate": 0.0030045871559633026,
+      "loss": 0.7344,
+      "step": 870
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.8609298531810766,
+      "eval_f1": 0.7885642715655152,
+      "eval_loss": 0.33706870675086975,
+      "eval_precision": 0.8326532721424038,
+      "eval_recall": 0.8017504308284955,
+      "eval_runtime": 20.1745,
+      "eval_samples_per_second": 121.54,
+      "eval_steps_per_second": 7.633,
+      "step": 872
+    },
+    {
+      "epoch": 4.04,
+      "grad_norm": 1.1233114004135132,
+      "learning_rate": 0.002981651376146789,
+      "loss": 0.6551,
+      "step": 880
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 1.2706884145736694,
+      "learning_rate": 0.0029587155963302753,
+      "loss": 0.7096,
+      "step": 890
+    },
+    {
+      "epoch": 4.13,
+      "grad_norm": 1.4524619579315186,
+      "learning_rate": 0.002935779816513762,
+      "loss": 0.7413,
+      "step": 900
+    },
+    {
+      "epoch": 4.17,
+      "grad_norm": 1.3791077136993408,
+      "learning_rate": 0.0029128440366972476,
+      "loss": 0.7428,
+      "step": 910
+    },
+    {
+      "epoch": 4.22,
+      "grad_norm": 1.3151274919509888,
+      "learning_rate": 0.0028899082568807338,
+      "loss": 0.7053,
+      "step": 920
+    },
+    {
+      "epoch": 4.27,
+      "grad_norm": 1.2521573305130005,
+      "learning_rate": 0.0028669724770642203,
+      "loss": 0.6814,
+      "step": 930
+    },
+    {
+      "epoch": 4.31,
+      "grad_norm": 1.200779676437378,
+      "learning_rate": 0.0028440366972477065,
+      "loss": 0.6757,
+      "step": 940
+    },
+    {
+      "epoch": 4.36,
+      "grad_norm": 1.3665342330932617,
+      "learning_rate": 0.0028211009174311926,
+      "loss": 0.677,
+      "step": 950
+    },
+    {
+      "epoch": 4.4,
+      "grad_norm": 1.4855738878250122,
+      "learning_rate": 0.002798165137614679,
+      "loss": 0.6805,
+      "step": 960
+    },
+    {
+      "epoch": 4.45,
+      "grad_norm": 1.2765145301818848,
+      "learning_rate": 0.0027752293577981654,
+      "loss": 0.6568,
+      "step": 970
+    },
+    {
+      "epoch": 4.5,
+      "grad_norm": 1.2457036972045898,
+      "learning_rate": 0.0027522935779816515,
+      "loss": 0.7198,
+      "step": 980
+    },
+    {
+      "epoch": 4.54,
+      "grad_norm": 1.3267652988433838,
+      "learning_rate": 0.0027293577981651372,
+      "loss": 0.6578,
+      "step": 990
+    },
+    {
+      "epoch": 4.59,
+      "grad_norm": 1.409703016281128,
+      "learning_rate": 0.002706422018348624,
+      "loss": 0.695,
+      "step": 1000
+    },
+    {
+      "epoch": 4.63,
+      "grad_norm": 1.089101791381836,
+      "learning_rate": 0.00268348623853211,
+      "loss": 0.6934,
+      "step": 1010
+    },
+    {
+      "epoch": 4.68,
+      "grad_norm": 1.238553762435913,
+      "learning_rate": 0.0026605504587155966,
+      "loss": 0.6932,
+      "step": 1020
+    },
+    {
+      "epoch": 4.72,
+      "grad_norm": 1.3457752466201782,
+      "learning_rate": 0.0026376146788990823,
+      "loss": 0.7615,
+      "step": 1030
+    },
+    {
+      "epoch": 4.77,
+      "grad_norm": 1.3853940963745117,
+      "learning_rate": 0.002614678899082569,
+      "loss": 0.7032,
+      "step": 1040
+    },
+    {
+      "epoch": 4.82,
+      "grad_norm": 1.5760701894760132,
+      "learning_rate": 0.002591743119266055,
+      "loss": 0.684,
+      "step": 1050
+    },
+    {
+      "epoch": 4.86,
+      "grad_norm": 1.2746469974517822,
+      "learning_rate": 0.0025688073394495416,
+      "loss": 0.7168,
+      "step": 1060
+    },
+    {
+      "epoch": 4.91,
+      "grad_norm": 1.0423808097839355,
+      "learning_rate": 0.0025458715596330273,
+      "loss": 0.7036,
+      "step": 1070
+    },
+    {
+      "epoch": 4.95,
+      "grad_norm": 1.255771279335022,
+      "learning_rate": 0.0025229357798165135,
+      "loss": 0.7069,
+      "step": 1080
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 1.7328591346740723,
+      "learning_rate": 0.0025,
+      "loss": 0.7181,
+      "step": 1090
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.8923327895595432,
+      "eval_f1": 0.8095838184642776,
+      "eval_loss": 0.2933848798274994,
+      "eval_precision": 0.8060491329782299,
+      "eval_recall": 0.8389422669715082,
+      "eval_runtime": 20.1372,
+      "eval_samples_per_second": 121.765,
+      "eval_steps_per_second": 7.648,
+      "step": 1090
+    },
+    {
+      "epoch": 5.05,
+      "grad_norm": 1.0849229097366333,
+      "learning_rate": 0.0024770642201834866,
+      "loss": 0.6191,
+      "step": 1100
+    },
+    {
+      "epoch": 5.09,
+      "grad_norm": 1.3102843761444092,
+      "learning_rate": 0.0024541284403669724,
+      "loss": 0.6732,
+      "step": 1110
+    },
+    {
+      "epoch": 5.14,
+      "grad_norm": 1.0374155044555664,
+      "learning_rate": 0.002431192660550459,
+      "loss": 0.654,
+      "step": 1120
+    },
+    {
+      "epoch": 5.18,
+      "grad_norm": 2.718107223510742,
+      "learning_rate": 0.002408256880733945,
+      "loss": 0.5944,
+      "step": 1130
+    },
+    {
+      "epoch": 5.23,
+      "grad_norm": 1.9114854335784912,
+      "learning_rate": 0.0023853211009174312,
+      "loss": 0.6039,
+      "step": 1140
+    },
+    {
+      "epoch": 5.28,
+      "grad_norm": 1.1414576768875122,
+      "learning_rate": 0.0023623853211009174,
+      "loss": 0.6064,
+      "step": 1150
+    },
+    {
+      "epoch": 5.32,
+      "grad_norm": 1.319360613822937,
+      "learning_rate": 0.002339449541284404,
+      "loss": 0.6335,
+      "step": 1160
+    },
+    {
+      "epoch": 5.37,
+      "grad_norm": 1.377007246017456,
+      "learning_rate": 0.00231651376146789,
+      "loss": 0.6431,
+      "step": 1170
+    },
+    {
+      "epoch": 5.41,
+      "grad_norm": 1.3753886222839355,
+      "learning_rate": 0.0022935779816513763,
+      "loss": 0.6594,
+      "step": 1180
+    },
+    {
+      "epoch": 5.46,
+      "grad_norm": 1.0963853597640991,
+      "learning_rate": 0.0022706422018348624,
+      "loss": 0.6999,
+      "step": 1190
+    },
+    {
+      "epoch": 5.5,
+      "grad_norm": 1.3119159936904907,
+      "learning_rate": 0.002247706422018349,
+      "loss": 0.5879,
+      "step": 1200
+    },
+    {
+      "epoch": 5.55,
+      "grad_norm": 1.000196099281311,
+      "learning_rate": 0.0022247706422018347,
+      "loss": 0.6559,
+      "step": 1210
+    },
+    {
+      "epoch": 5.6,
+      "grad_norm": 1.1916228532791138,
+      "learning_rate": 0.0022018348623853213,
+      "loss": 0.6322,
+      "step": 1220
+    },
+    {
+      "epoch": 5.64,
+      "grad_norm": 1.3752835988998413,
+      "learning_rate": 0.0021788990825688075,
+      "loss": 0.6684,
+      "step": 1230
+    },
+    {
+      "epoch": 5.69,
+      "grad_norm": 1.3724400997161865,
+      "learning_rate": 0.0021559633027522936,
+      "loss": 0.6126,
+      "step": 1240
+    },
+    {
+      "epoch": 5.73,
+      "grad_norm": 1.1628080606460571,
+      "learning_rate": 0.0021330275229357798,
+      "loss": 0.6358,
+      "step": 1250
+    },
+    {
+      "epoch": 5.78,
+      "grad_norm": 1.222913384437561,
+      "learning_rate": 0.0021100917431192663,
+      "loss": 0.6124,
+      "step": 1260
+    },
+    {
+      "epoch": 5.83,
+      "grad_norm": 1.2353663444519043,
+      "learning_rate": 0.0020871559633027525,
+      "loss": 0.688,
+      "step": 1270
+    },
+    {
+      "epoch": 5.87,
+      "grad_norm": 1.0336887836456299,
+      "learning_rate": 0.0020642201834862386,
+      "loss": 0.6118,
+      "step": 1280
+    },
+    {
+      "epoch": 5.92,
+      "grad_norm": 0.934916079044342,
+      "learning_rate": 0.002041284403669725,
+      "loss": 0.6061,
+      "step": 1290
+    },
+    {
+      "epoch": 5.96,
+      "grad_norm": 1.282327651977539,
+      "learning_rate": 0.002018348623853211,
+      "loss": 0.5857,
+      "step": 1300
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.8858075040783034,
+      "eval_f1": 0.8314901516993909,
+      "eval_loss": 0.2926943302154541,
+      "eval_precision": 0.8493368689933978,
+      "eval_recall": 0.8358247845047836,
+      "eval_runtime": 20.132,
+      "eval_samples_per_second": 121.796,
+      "eval_steps_per_second": 7.65,
+      "step": 1308
+    },
+    {
+      "epoch": 6.01,
+      "grad_norm": 1.3109623193740845,
+      "learning_rate": 0.001995412844036697,
+      "loss": 0.6379,
+      "step": 1310
+    },
+    {
+      "epoch": 6.06,
+      "grad_norm": 1.0487977266311646,
+      "learning_rate": 0.0019724770642201837,
+      "loss": 0.6469,
+      "step": 1320
+    },
+    {
+      "epoch": 6.1,
+      "grad_norm": 1.1113476753234863,
+      "learning_rate": 0.0019495412844036698,
+      "loss": 0.6182,
+      "step": 1330
+    },
+    {
+      "epoch": 6.15,
+      "grad_norm": 1.2381951808929443,
+      "learning_rate": 0.0019266055045871562,
+      "loss": 0.6625,
+      "step": 1340
+    },
+    {
+      "epoch": 6.19,
+      "grad_norm": 1.175887107849121,
+      "learning_rate": 0.0019036697247706421,
+      "loss": 0.6267,
+      "step": 1350
+    },
+    {
+      "epoch": 6.24,
+      "grad_norm": 0.798713743686676,
+      "learning_rate": 0.0018807339449541285,
+      "loss": 0.5674,
+      "step": 1360
+    },
+    {
+      "epoch": 6.28,
+      "grad_norm": 0.9393543004989624,
+      "learning_rate": 0.0018577981651376147,
+      "loss": 0.5958,
+      "step": 1370
+    },
+    {
+      "epoch": 6.33,
+      "grad_norm": 1.7909319400787354,
+      "learning_rate": 0.001834862385321101,
+      "loss": 0.5627,
+      "step": 1380
+    },
+    {
+      "epoch": 6.38,
+      "grad_norm": 1.0835124254226685,
+      "learning_rate": 0.0018119266055045872,
+      "loss": 0.5894,
+      "step": 1390
+    },
+    {
+      "epoch": 6.42,
+      "grad_norm": 1.349327802658081,
+      "learning_rate": 0.0017889908256880735,
+      "loss": 0.5777,
+      "step": 1400
+    },
+    {
+      "epoch": 6.47,
+      "grad_norm": 0.8337296843528748,
+      "learning_rate": 0.0017660550458715597,
+      "loss": 0.5706,
+      "step": 1410
+    },
+    {
+      "epoch": 6.51,
+      "grad_norm": 1.2801483869552612,
+      "learning_rate": 0.001743119266055046,
+      "loss": 0.5262,
+      "step": 1420
+    },
+    {
+      "epoch": 6.56,
+      "grad_norm": 1.4153425693511963,
+      "learning_rate": 0.0017201834862385322,
+      "loss": 0.5377,
+      "step": 1430
+    },
+    {
+      "epoch": 6.61,
+      "grad_norm": 1.23189377784729,
+      "learning_rate": 0.0016972477064220186,
+      "loss": 0.6018,
+      "step": 1440
+    },
+    {
+      "epoch": 6.65,
+      "grad_norm": 1.4015734195709229,
+      "learning_rate": 0.0016743119266055045,
+      "loss": 0.5903,
+      "step": 1450
+    },
+    {
+      "epoch": 6.7,
+      "grad_norm": 1.4871268272399902,
+      "learning_rate": 0.0016513761467889909,
+      "loss": 0.562,
+      "step": 1460
+    },
+    {
+      "epoch": 6.74,
+      "grad_norm": 1.0915515422821045,
+      "learning_rate": 0.001628440366972477,
+      "loss": 0.552,
+      "step": 1470
+    },
+    {
+      "epoch": 6.79,
+      "grad_norm": 1.5078574419021606,
+      "learning_rate": 0.0016055045871559634,
+      "loss": 0.4712,
+      "step": 1480
+    },
+    {
+      "epoch": 6.83,
+      "grad_norm": 1.771911859512329,
+      "learning_rate": 0.0015825688073394495,
+      "loss": 0.5985,
+      "step": 1490
+    },
+    {
+      "epoch": 6.88,
+      "grad_norm": 1.1326086521148682,
+      "learning_rate": 0.001559633027522936,
+      "loss": 0.6038,
+      "step": 1500
+    },
+    {
+      "epoch": 6.93,
+      "grad_norm": 0.9454457759857178,
+      "learning_rate": 0.001536697247706422,
+      "loss": 0.5471,
+      "step": 1510
+    },
+    {
+      "epoch": 6.97,
+      "grad_norm": 1.146789312362671,
+      "learning_rate": 0.0015137614678899084,
+      "loss": 0.5607,
+      "step": 1520
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.9061990212071778,
+      "eval_f1": 0.8415533276455389,
+      "eval_loss": 0.22090552747249603,
+      "eval_precision": 0.8658073048818948,
+      "eval_recall": 0.854663440247606,
+      "eval_runtime": 20.17,
+      "eval_samples_per_second": 121.567,
+      "eval_steps_per_second": 7.635,
+      "step": 1526
+    },
+    {
+      "epoch": 7.02,
+      "grad_norm": 1.0613855123519897,
+      "learning_rate": 0.0014908256880733946,
+      "loss": 0.5094,
+      "step": 1530
+    },
+    {
+      "epoch": 7.06,
+      "grad_norm": 0.9999263286590576,
+      "learning_rate": 0.001467889908256881,
+      "loss": 0.5519,
+      "step": 1540
+    },
+    {
+      "epoch": 7.11,
+      "grad_norm": 1.2755348682403564,
+      "learning_rate": 0.0014449541284403669,
+      "loss": 0.5626,
+      "step": 1550
+    },
+    {
+      "epoch": 7.16,
+      "grad_norm": 1.0304380655288696,
+      "learning_rate": 0.0014220183486238532,
+      "loss": 0.5547,
+      "step": 1560
+    },
+    {
+      "epoch": 7.2,
+      "grad_norm": 1.4103654623031616,
+      "learning_rate": 0.0013990825688073394,
+      "loss": 0.5487,
+      "step": 1570
+    },
+    {
+      "epoch": 7.25,
+      "grad_norm": 0.967018723487854,
+      "learning_rate": 0.0013761467889908258,
+      "loss": 0.5428,
+      "step": 1580
+    },
+    {
+      "epoch": 7.29,
+      "grad_norm": 1.2113174200057983,
+      "learning_rate": 0.001353211009174312,
+      "loss": 0.5207,
+      "step": 1590
+    },
+    {
+      "epoch": 7.34,
+      "grad_norm": 1.2477692365646362,
+      "learning_rate": 0.0013302752293577983,
+      "loss": 0.5647,
+      "step": 1600
+    },
+    {
+      "epoch": 7.39,
+      "grad_norm": 0.9783982038497925,
+      "learning_rate": 0.0013073394495412844,
+      "loss": 0.5262,
+      "step": 1610
+    },
+    {
+      "epoch": 7.43,
+      "grad_norm": 1.3188928365707397,
+      "learning_rate": 0.0012844036697247708,
+      "loss": 0.5012,
+      "step": 1620
+    },
+    {
+      "epoch": 7.48,
+      "grad_norm": 1.1862777471542358,
+      "learning_rate": 0.0012614678899082567,
+      "loss": 0.5135,
+      "step": 1630
+    },
+    {
+      "epoch": 7.52,
+      "grad_norm": 0.9528157114982605,
+      "learning_rate": 0.0012385321100917433,
+      "loss": 0.554,
+      "step": 1640
+    },
+    {
+      "epoch": 7.57,
+      "grad_norm": 1.229379653930664,
+      "learning_rate": 0.0012155963302752295,
+      "loss": 0.4834,
+      "step": 1650
+    },
+    {
+      "epoch": 7.61,
+      "grad_norm": 1.2559857368469238,
+      "learning_rate": 0.0011926605504587156,
+      "loss": 0.5633,
+      "step": 1660
+    },
+    {
+      "epoch": 7.66,
+      "grad_norm": 1.423509120941162,
+      "learning_rate": 0.001169724770642202,
+      "loss": 0.5654,
+      "step": 1670
+    },
+    {
+      "epoch": 7.71,
+      "grad_norm": 1.0073614120483398,
+      "learning_rate": 0.0011467889908256881,
+      "loss": 0.555,
+      "step": 1680
+    },
+    {
+      "epoch": 7.75,
+      "grad_norm": 0.8332647085189819,
+      "learning_rate": 0.0011238532110091745,
+      "loss": 0.5221,
+      "step": 1690
+    },
+    {
+      "epoch": 7.8,
+      "grad_norm": 1.2242189645767212,
+      "learning_rate": 0.0011009174311926607,
+      "loss": 0.5209,
+      "step": 1700
+    },
+    {
+      "epoch": 7.84,
+      "grad_norm": 1.2133524417877197,
+      "learning_rate": 0.0010779816513761468,
+      "loss": 0.4905,
+      "step": 1710
+    },
+    {
+      "epoch": 7.89,
+      "grad_norm": 1.3106974363327026,
+      "learning_rate": 0.0010550458715596332,
+      "loss": 0.5018,
+      "step": 1720
+    },
+    {
+      "epoch": 7.94,
+      "grad_norm": 1.1411136388778687,
+      "learning_rate": 0.0010321100917431193,
+      "loss": 0.5594,
+      "step": 1730
+    },
+    {
+      "epoch": 7.98,
+      "grad_norm": 1.3002750873565674,
+      "learning_rate": 0.0010091743119266055,
+      "loss": 0.5423,
+      "step": 1740
+    },
+    {
+      "epoch": 8.0,
+      "eval_accuracy": 0.9025285481239804,
+      "eval_f1": 0.8487271486984757,
+      "eval_loss": 0.2513488829135895,
+      "eval_precision": 0.854490909662593,
+      "eval_recall": 0.847020693577074,
+      "eval_runtime": 20.1419,
+      "eval_samples_per_second": 121.736,
+      "eval_steps_per_second": 7.646,
+      "step": 1744
+    },
+    {
+      "epoch": 8.03,
+      "grad_norm": 0.9419348835945129,
+      "learning_rate": 0.0009862385321100918,
+      "loss": 0.4677,
+      "step": 1750
+    },
+    {
+      "epoch": 8.07,
+      "grad_norm": 1.2686134576797485,
+      "learning_rate": 0.0009633027522935781,
+      "loss": 0.5112,
+      "step": 1760
+    },
+    {
+      "epoch": 8.12,
+      "grad_norm": 1.0132619142532349,
+      "learning_rate": 0.0009403669724770643,
+      "loss": 0.4776,
+      "step": 1770
+    },
+    {
+      "epoch": 8.17,
+      "grad_norm": 1.5143158435821533,
+      "learning_rate": 0.0009174311926605505,
+      "loss": 0.4824,
+      "step": 1780
+    },
+    {
+      "epoch": 8.21,
+      "grad_norm": 0.9703628420829773,
+      "learning_rate": 0.0008944954128440368,
+      "loss": 0.5088,
+      "step": 1790
+    },
+    {
+      "epoch": 8.26,
+      "grad_norm": 1.054370403289795,
+      "learning_rate": 0.000871559633027523,
+      "loss": 0.5356,
+      "step": 1800
+    },
+    {
+      "epoch": 8.3,
+      "grad_norm": 1.4087867736816406,
+      "learning_rate": 0.0008486238532110093,
+      "loss": 0.4931,
+      "step": 1810
+    },
+    {
+      "epoch": 8.35,
+      "grad_norm": 1.2010319232940674,
+      "learning_rate": 0.0008256880733944954,
+      "loss": 0.457,
+      "step": 1820
+    },
+    {
+      "epoch": 8.39,
+      "grad_norm": 0.9890044927597046,
+      "learning_rate": 0.0008027522935779817,
+      "loss": 0.4841,
+      "step": 1830
+    },
+    {
+      "epoch": 8.44,
+      "grad_norm": 1.102015733718872,
+      "learning_rate": 0.000779816513761468,
+      "loss": 0.5156,
+      "step": 1840
+    },
+    {
+      "epoch": 8.49,
+      "grad_norm": 1.8185703754425049,
+      "learning_rate": 0.0007568807339449542,
+      "loss": 0.5208,
+      "step": 1850
+    },
+    {
+      "epoch": 8.53,
+      "grad_norm": 1.0316917896270752,
+      "learning_rate": 0.0007339449541284405,
+      "loss": 0.4669,
+      "step": 1860
+    },
+    {
+      "epoch": 8.58,
+      "grad_norm": 1.3456588983535767,
+      "learning_rate": 0.0007110091743119266,
+      "loss": 0.5149,
+      "step": 1870
+    },
+    {
+      "epoch": 8.62,
+      "grad_norm": 1.1594740152359009,
+      "learning_rate": 0.0006880733944954129,
+      "loss": 0.4816,
+      "step": 1880
+    },
+    {
+      "epoch": 8.67,
+      "grad_norm": 1.195693850517273,
+      "learning_rate": 0.0006651376146788991,
+      "loss": 0.4264,
+      "step": 1890
+    },
+    {
+      "epoch": 8.72,
+      "grad_norm": 0.8687453269958496,
+      "learning_rate": 0.0006422018348623854,
+      "loss": 0.5055,
+      "step": 1900
+    },
+    {
+      "epoch": 8.76,
+      "grad_norm": 1.019943118095398,
+      "learning_rate": 0.0006192660550458717,
+      "loss": 0.4862,
+      "step": 1910
+    },
+    {
+      "epoch": 8.81,
+      "grad_norm": 1.0585637092590332,
+      "learning_rate": 0.0005963302752293578,
+      "loss": 0.4698,
+      "step": 1920
+    },
+    {
+      "epoch": 8.85,
+      "grad_norm": 1.1640921831130981,
+      "learning_rate": 0.0005733944954128441,
+      "loss": 0.4725,
+      "step": 1930
+    },
+    {
+      "epoch": 8.9,
+      "grad_norm": 1.0359253883361816,
+      "learning_rate": 0.0005504587155963303,
+      "loss": 0.4677,
+      "step": 1940
+    },
+    {
+      "epoch": 8.94,
+      "grad_norm": 0.795647382736206,
+      "learning_rate": 0.0005275229357798166,
+      "loss": 0.4597,
+      "step": 1950
+    },
+    {
+      "epoch": 8.99,
+      "grad_norm": 0.8944999575614929,
+      "learning_rate": 0.0005045871559633027,
+      "loss": 0.4053,
+      "step": 1960
+    },
+    {
+      "epoch": 9.0,
+      "eval_accuracy": 0.9037520391517129,
+      "eval_f1": 0.8372664360619215,
+      "eval_loss": 0.256144642829895,
+      "eval_precision": 0.8543098842069452,
+      "eval_recall": 0.8456765717856151,
+      "eval_runtime": 20.1301,
+      "eval_samples_per_second": 121.808,
+      "eval_steps_per_second": 7.65,
+      "step": 1962
+    },
+    {
+      "epoch": 9.04,
+      "grad_norm": 0.7914834022521973,
+      "learning_rate": 0.00048165137614678905,
+      "loss": 0.5022,
+      "step": 1970
+    },
+    {
+      "epoch": 9.08,
+      "grad_norm": 1.254602313041687,
+      "learning_rate": 0.00045871559633027525,
+      "loss": 0.4237,
+      "step": 1980
+    },
+    {
+      "epoch": 9.13,
+      "grad_norm": 1.0266820192337036,
+      "learning_rate": 0.0004357798165137615,
+      "loss": 0.4579,
+      "step": 1990
+    },
+    {
+      "epoch": 9.17,
+      "grad_norm": 0.948698103427887,
+      "learning_rate": 0.0004128440366972477,
+      "loss": 0.4518,
+      "step": 2000
+    },
+    {
+      "epoch": 9.22,
+      "grad_norm": 1.4235280752182007,
+      "learning_rate": 0.000389908256880734,
+      "loss": 0.4189,
+      "step": 2010
+    },
+    {
+      "epoch": 9.27,
+      "grad_norm": 1.2883589267730713,
+      "learning_rate": 0.00036697247706422024,
+      "loss": 0.4441,
+      "step": 2020
+    },
+    {
+      "epoch": 9.31,
+      "grad_norm": 1.0829825401306152,
+      "learning_rate": 0.00034403669724770644,
+      "loss": 0.3735,
+      "step": 2030
+    },
+    {
+      "epoch": 9.36,
+      "grad_norm": 1.0987470149993896,
+      "learning_rate": 0.0003211009174311927,
+      "loss": 0.4356,
+      "step": 2040
+    },
+    {
+      "epoch": 9.4,
+      "grad_norm": 1.0250358581542969,
+      "learning_rate": 0.0002981651376146789,
+      "loss": 0.4606,
+      "step": 2050
+    },
+    {
+      "epoch": 9.45,
+      "grad_norm": 0.8266171216964722,
+      "learning_rate": 0.00027522935779816516,
+      "loss": 0.4216,
+      "step": 2060
+    },
+    {
+      "epoch": 9.5,
+      "grad_norm": 0.7694286108016968,
+      "learning_rate": 0.00025229357798165137,
+      "loss": 0.4054,
+      "step": 2070
+    },
+    {
+      "epoch": 9.54,
+      "grad_norm": 1.0557419061660767,
+      "learning_rate": 0.00022935779816513763,
+      "loss": 0.4145,
+      "step": 2080
+    },
+    {
+      "epoch": 9.59,
+      "grad_norm": 1.1996885538101196,
+      "learning_rate": 0.00020871559633027525,
+      "loss": 0.4446,
+      "step": 2090
+    },
+    {
+      "epoch": 9.63,
+      "grad_norm": 0.971155047416687,
+      "learning_rate": 0.00018577981651376148,
+      "loss": 0.4329,
+      "step": 2100
+    },
+    {
+      "epoch": 9.68,
+      "grad_norm": 1.243017554283142,
+      "learning_rate": 0.0001628440366972477,
+      "loss": 0.386,
+      "step": 2110
+    },
+    {
+      "epoch": 9.72,
+      "grad_norm": 0.9568091034889221,
+      "learning_rate": 0.00013990825688073395,
+      "loss": 0.4192,
+      "step": 2120
+    },
+    {
+      "epoch": 9.77,
+      "grad_norm": 0.8637120723724365,
+      "learning_rate": 0.00011697247706422019,
+      "loss": 0.414,
+      "step": 2130
+    },
+    {
+      "epoch": 9.82,
+      "grad_norm": 1.4437438249588013,
+      "learning_rate": 9.403669724770644e-05,
+      "loss": 0.4716,
+      "step": 2140
+    },
+    {
+      "epoch": 9.86,
+      "grad_norm": 0.9072945713996887,
+      "learning_rate": 7.110091743119267e-05,
+      "loss": 0.4339,
+      "step": 2150
+    },
+    {
+      "epoch": 9.91,
+      "grad_norm": 1.0016337633132935,
+      "learning_rate": 4.81651376146789e-05,
+      "loss": 0.4118,
+      "step": 2160
+    },
+    {
+      "epoch": 9.95,
+      "grad_norm": 1.1547799110412598,
+      "learning_rate": 2.5229357798165138e-05,
+      "loss": 0.4367,
+      "step": 2170
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 1.1134579181671143,
+      "learning_rate": 2.2935779816513764e-06,
+      "loss": 0.4417,
+      "step": 2180
+    },
+    {
+      "epoch": 10.0,
+      "eval_accuracy": 0.899673735725938,
+      "eval_f1": 0.8415790365990568,
+      "eval_loss": 0.2557845115661621,
+      "eval_precision": 0.8463460685420455,
+      "eval_recall": 0.8395151187215174,
+      "eval_runtime": 20.1334,
+      "eval_samples_per_second": 121.788,
+      "eval_steps_per_second": 7.649,
+      "step": 2180
+    },
+    {
+      "epoch": 10.0,
+      "step": 2180,
+      "total_flos": 1.0878579515820442e+19,
+      "train_loss": 0.69453261346992,
+      "train_runtime": 2383.9912,
+      "train_samples_per_second": 58.473,
+      "train_steps_per_second": 0.914
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 2180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 1.0878579515820442e+19,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}