swin-tiny-finetuned-cifar100 / trainer_state.json
Mazen Amria
Training in progress, epoch 5
a0f9489
raw
history blame
96.3 kB
{
"best_metric": 0.8735,
"best_model_checkpoint": "swin-tiny-finetuned-cifar100/checkpoint-3905",
"epoch": 4.99968,
"global_step": 3905,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 5.115089514066497e-07,
"loss": 4.6445,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 1.0230179028132994e-06,
"loss": 4.6363,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 1.534526854219949e-06,
"loss": 4.6361,
"step": 15
},
{
"epoch": 0.03,
"learning_rate": 2.0460358056265987e-06,
"loss": 4.6511,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 2.5575447570332483e-06,
"loss": 4.6596,
"step": 25
},
{
"epoch": 0.04,
"learning_rate": 3.069053708439898e-06,
"loss": 4.6345,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 3.5805626598465474e-06,
"loss": 4.6177,
"step": 35
},
{
"epoch": 0.05,
"learning_rate": 4.092071611253197e-06,
"loss": 4.6046,
"step": 40
},
{
"epoch": 0.06,
"learning_rate": 4.603580562659847e-06,
"loss": 4.6372,
"step": 45
},
{
"epoch": 0.06,
"learning_rate": 5.1150895140664966e-06,
"loss": 4.5815,
"step": 50
},
{
"epoch": 0.07,
"learning_rate": 5.626598465473146e-06,
"loss": 4.5585,
"step": 55
},
{
"epoch": 0.08,
"learning_rate": 6.138107416879796e-06,
"loss": 4.6204,
"step": 60
},
{
"epoch": 0.08,
"learning_rate": 6.649616368286445e-06,
"loss": 4.5496,
"step": 65
},
{
"epoch": 0.09,
"learning_rate": 7.161125319693095e-06,
"loss": 4.5638,
"step": 70
},
{
"epoch": 0.1,
"learning_rate": 7.672634271099745e-06,
"loss": 4.5451,
"step": 75
},
{
"epoch": 0.1,
"learning_rate": 8.184143222506395e-06,
"loss": 4.5389,
"step": 80
},
{
"epoch": 0.11,
"learning_rate": 8.695652173913044e-06,
"loss": 4.4942,
"step": 85
},
{
"epoch": 0.12,
"learning_rate": 9.207161125319694e-06,
"loss": 4.4929,
"step": 90
},
{
"epoch": 0.12,
"learning_rate": 9.718670076726344e-06,
"loss": 4.4607,
"step": 95
},
{
"epoch": 0.13,
"learning_rate": 1.0230179028132993e-05,
"loss": 4.4665,
"step": 100
},
{
"epoch": 0.13,
"learning_rate": 1.0741687979539643e-05,
"loss": 4.438,
"step": 105
},
{
"epoch": 0.14,
"learning_rate": 1.1253196930946292e-05,
"loss": 4.415,
"step": 110
},
{
"epoch": 0.15,
"learning_rate": 1.1764705882352942e-05,
"loss": 4.3831,
"step": 115
},
{
"epoch": 0.15,
"learning_rate": 1.2276214833759591e-05,
"loss": 4.3877,
"step": 120
},
{
"epoch": 0.16,
"learning_rate": 1.2787723785166241e-05,
"loss": 4.3453,
"step": 125
},
{
"epoch": 0.17,
"learning_rate": 1.329923273657289e-05,
"loss": 4.3025,
"step": 130
},
{
"epoch": 0.17,
"learning_rate": 1.381074168797954e-05,
"loss": 4.2795,
"step": 135
},
{
"epoch": 0.18,
"learning_rate": 1.432225063938619e-05,
"loss": 4.2353,
"step": 140
},
{
"epoch": 0.19,
"learning_rate": 1.483375959079284e-05,
"loss": 4.1786,
"step": 145
},
{
"epoch": 0.19,
"learning_rate": 1.534526854219949e-05,
"loss": 4.1939,
"step": 150
},
{
"epoch": 0.2,
"learning_rate": 1.585677749360614e-05,
"loss": 4.0723,
"step": 155
},
{
"epoch": 0.2,
"learning_rate": 1.636828644501279e-05,
"loss": 4.0102,
"step": 160
},
{
"epoch": 0.21,
"learning_rate": 1.687979539641944e-05,
"loss": 3.9292,
"step": 165
},
{
"epoch": 0.22,
"learning_rate": 1.739130434782609e-05,
"loss": 3.8505,
"step": 170
},
{
"epoch": 0.22,
"learning_rate": 1.790281329923274e-05,
"loss": 3.7539,
"step": 175
},
{
"epoch": 0.23,
"learning_rate": 1.8414322250639388e-05,
"loss": 3.6833,
"step": 180
},
{
"epoch": 0.24,
"learning_rate": 1.8925831202046038e-05,
"loss": 3.5281,
"step": 185
},
{
"epoch": 0.24,
"learning_rate": 1.9437340153452687e-05,
"loss": 3.4329,
"step": 190
},
{
"epoch": 0.25,
"learning_rate": 1.9948849104859337e-05,
"loss": 3.3203,
"step": 195
},
{
"epoch": 0.26,
"learning_rate": 2.0460358056265986e-05,
"loss": 3.1954,
"step": 200
},
{
"epoch": 0.26,
"learning_rate": 2.0971867007672636e-05,
"loss": 3.0686,
"step": 205
},
{
"epoch": 0.27,
"learning_rate": 2.1483375959079285e-05,
"loss": 2.9424,
"step": 210
},
{
"epoch": 0.28,
"learning_rate": 2.1994884910485935e-05,
"loss": 3.0176,
"step": 215
},
{
"epoch": 0.28,
"learning_rate": 2.2506393861892585e-05,
"loss": 2.8912,
"step": 220
},
{
"epoch": 0.29,
"learning_rate": 2.3017902813299234e-05,
"loss": 2.7802,
"step": 225
},
{
"epoch": 0.29,
"learning_rate": 2.3529411764705884e-05,
"loss": 2.7013,
"step": 230
},
{
"epoch": 0.3,
"learning_rate": 2.4040920716112533e-05,
"loss": 2.6416,
"step": 235
},
{
"epoch": 0.31,
"learning_rate": 2.4552429667519183e-05,
"loss": 2.5482,
"step": 240
},
{
"epoch": 0.31,
"learning_rate": 2.5063938618925832e-05,
"loss": 2.3947,
"step": 245
},
{
"epoch": 0.32,
"learning_rate": 2.5575447570332482e-05,
"loss": 2.2859,
"step": 250
},
{
"epoch": 0.33,
"learning_rate": 2.608695652173913e-05,
"loss": 2.2609,
"step": 255
},
{
"epoch": 0.33,
"learning_rate": 2.659846547314578e-05,
"loss": 2.2065,
"step": 260
},
{
"epoch": 0.34,
"learning_rate": 2.710997442455243e-05,
"loss": 2.1006,
"step": 265
},
{
"epoch": 0.35,
"learning_rate": 2.762148337595908e-05,
"loss": 1.9752,
"step": 270
},
{
"epoch": 0.35,
"learning_rate": 2.813299232736573e-05,
"loss": 2.0412,
"step": 275
},
{
"epoch": 0.36,
"learning_rate": 2.864450127877238e-05,
"loss": 2.0338,
"step": 280
},
{
"epoch": 0.36,
"learning_rate": 2.915601023017903e-05,
"loss": 1.8349,
"step": 285
},
{
"epoch": 0.37,
"learning_rate": 2.966751918158568e-05,
"loss": 1.7146,
"step": 290
},
{
"epoch": 0.38,
"learning_rate": 3.0179028132992328e-05,
"loss": 1.7222,
"step": 295
},
{
"epoch": 0.38,
"learning_rate": 3.069053708439898e-05,
"loss": 1.8153,
"step": 300
},
{
"epoch": 0.39,
"learning_rate": 3.120204603580563e-05,
"loss": 1.6485,
"step": 305
},
{
"epoch": 0.4,
"learning_rate": 3.171355498721228e-05,
"loss": 1.485,
"step": 310
},
{
"epoch": 0.4,
"learning_rate": 3.222506393861893e-05,
"loss": 1.7435,
"step": 315
},
{
"epoch": 0.41,
"learning_rate": 3.273657289002558e-05,
"loss": 1.6707,
"step": 320
},
{
"epoch": 0.42,
"learning_rate": 3.324808184143223e-05,
"loss": 1.5172,
"step": 325
},
{
"epoch": 0.42,
"learning_rate": 3.375959079283888e-05,
"loss": 1.538,
"step": 330
},
{
"epoch": 0.43,
"learning_rate": 3.427109974424553e-05,
"loss": 1.424,
"step": 335
},
{
"epoch": 0.44,
"learning_rate": 3.478260869565218e-05,
"loss": 1.3758,
"step": 340
},
{
"epoch": 0.44,
"learning_rate": 3.529411764705883e-05,
"loss": 1.3251,
"step": 345
},
{
"epoch": 0.45,
"learning_rate": 3.580562659846548e-05,
"loss": 1.3147,
"step": 350
},
{
"epoch": 0.45,
"learning_rate": 3.6317135549872126e-05,
"loss": 1.3606,
"step": 355
},
{
"epoch": 0.46,
"learning_rate": 3.6828644501278776e-05,
"loss": 1.3198,
"step": 360
},
{
"epoch": 0.47,
"learning_rate": 3.7340153452685426e-05,
"loss": 1.4826,
"step": 365
},
{
"epoch": 0.47,
"learning_rate": 3.7851662404092075e-05,
"loss": 1.1348,
"step": 370
},
{
"epoch": 0.48,
"learning_rate": 3.8363171355498725e-05,
"loss": 1.2849,
"step": 375
},
{
"epoch": 0.49,
"learning_rate": 3.8874680306905374e-05,
"loss": 1.2261,
"step": 380
},
{
"epoch": 0.49,
"learning_rate": 3.9386189258312024e-05,
"loss": 1.175,
"step": 385
},
{
"epoch": 0.5,
"learning_rate": 3.989769820971867e-05,
"loss": 1.1654,
"step": 390
},
{
"epoch": 0.51,
"learning_rate": 3.9954467842914065e-05,
"loss": 1.2218,
"step": 395
},
{
"epoch": 0.51,
"learning_rate": 3.989755264655663e-05,
"loss": 1.0849,
"step": 400
},
{
"epoch": 0.52,
"learning_rate": 3.9840637450199205e-05,
"loss": 1.1411,
"step": 405
},
{
"epoch": 0.52,
"learning_rate": 3.978372225384178e-05,
"loss": 1.168,
"step": 410
},
{
"epoch": 0.53,
"learning_rate": 3.972680705748435e-05,
"loss": 1.2054,
"step": 415
},
{
"epoch": 0.54,
"learning_rate": 3.9669891861126925e-05,
"loss": 1.1557,
"step": 420
},
{
"epoch": 0.54,
"learning_rate": 3.96129766647695e-05,
"loss": 1.119,
"step": 425
},
{
"epoch": 0.55,
"learning_rate": 3.955606146841207e-05,
"loss": 1.1867,
"step": 430
},
{
"epoch": 0.56,
"learning_rate": 3.949914627205464e-05,
"loss": 0.9051,
"step": 435
},
{
"epoch": 0.56,
"learning_rate": 3.944223107569721e-05,
"loss": 1.0337,
"step": 440
},
{
"epoch": 0.57,
"learning_rate": 3.9385315879339785e-05,
"loss": 0.9839,
"step": 445
},
{
"epoch": 0.58,
"learning_rate": 3.932840068298236e-05,
"loss": 0.866,
"step": 450
},
{
"epoch": 0.58,
"learning_rate": 3.927148548662493e-05,
"loss": 0.9547,
"step": 455
},
{
"epoch": 0.59,
"learning_rate": 3.9214570290267505e-05,
"loss": 0.9838,
"step": 460
},
{
"epoch": 0.6,
"learning_rate": 3.915765509391008e-05,
"loss": 1.1078,
"step": 465
},
{
"epoch": 0.6,
"learning_rate": 3.910073989755265e-05,
"loss": 0.932,
"step": 470
},
{
"epoch": 0.61,
"learning_rate": 3.9043824701195225e-05,
"loss": 0.9879,
"step": 475
},
{
"epoch": 0.61,
"learning_rate": 3.898690950483779e-05,
"loss": 1.1054,
"step": 480
},
{
"epoch": 0.62,
"learning_rate": 3.8929994308480365e-05,
"loss": 0.9784,
"step": 485
},
{
"epoch": 0.63,
"learning_rate": 3.887307911212294e-05,
"loss": 1.0294,
"step": 490
},
{
"epoch": 0.63,
"learning_rate": 3.881616391576551e-05,
"loss": 0.946,
"step": 495
},
{
"epoch": 0.64,
"learning_rate": 3.8759248719408085e-05,
"loss": 0.9403,
"step": 500
},
{
"epoch": 0.65,
"learning_rate": 3.870233352305066e-05,
"loss": 0.8587,
"step": 505
},
{
"epoch": 0.65,
"learning_rate": 3.864541832669323e-05,
"loss": 0.881,
"step": 510
},
{
"epoch": 0.66,
"learning_rate": 3.85885031303358e-05,
"loss": 0.904,
"step": 515
},
{
"epoch": 0.67,
"learning_rate": 3.853158793397838e-05,
"loss": 0.9147,
"step": 520
},
{
"epoch": 0.67,
"learning_rate": 3.8474672737620945e-05,
"loss": 0.9299,
"step": 525
},
{
"epoch": 0.68,
"learning_rate": 3.841775754126352e-05,
"loss": 0.9064,
"step": 530
},
{
"epoch": 0.68,
"learning_rate": 3.836084234490609e-05,
"loss": 1.0158,
"step": 535
},
{
"epoch": 0.69,
"learning_rate": 3.8303927148548666e-05,
"loss": 0.8712,
"step": 540
},
{
"epoch": 0.7,
"learning_rate": 3.824701195219124e-05,
"loss": 0.8876,
"step": 545
},
{
"epoch": 0.7,
"learning_rate": 3.819009675583381e-05,
"loss": 0.9165,
"step": 550
},
{
"epoch": 0.71,
"learning_rate": 3.8133181559476386e-05,
"loss": 0.7727,
"step": 555
},
{
"epoch": 0.72,
"learning_rate": 3.807626636311895e-05,
"loss": 0.9544,
"step": 560
},
{
"epoch": 0.72,
"learning_rate": 3.8019351166761526e-05,
"loss": 0.8282,
"step": 565
},
{
"epoch": 0.73,
"learning_rate": 3.79624359704041e-05,
"loss": 0.8105,
"step": 570
},
{
"epoch": 0.74,
"learning_rate": 3.790552077404667e-05,
"loss": 0.8246,
"step": 575
},
{
"epoch": 0.74,
"learning_rate": 3.7848605577689246e-05,
"loss": 0.8071,
"step": 580
},
{
"epoch": 0.75,
"learning_rate": 3.779169038133182e-05,
"loss": 0.964,
"step": 585
},
{
"epoch": 0.76,
"learning_rate": 3.773477518497439e-05,
"loss": 0.8634,
"step": 590
},
{
"epoch": 0.76,
"learning_rate": 3.7677859988616966e-05,
"loss": 0.7772,
"step": 595
},
{
"epoch": 0.77,
"learning_rate": 3.762094479225954e-05,
"loss": 0.8086,
"step": 600
},
{
"epoch": 0.77,
"learning_rate": 3.7564029595902106e-05,
"loss": 0.8886,
"step": 605
},
{
"epoch": 0.78,
"learning_rate": 3.750711439954468e-05,
"loss": 0.8618,
"step": 610
},
{
"epoch": 0.79,
"learning_rate": 3.745019920318725e-05,
"loss": 0.8221,
"step": 615
},
{
"epoch": 0.79,
"learning_rate": 3.7393284006829826e-05,
"loss": 0.9336,
"step": 620
},
{
"epoch": 0.8,
"learning_rate": 3.73363688104724e-05,
"loss": 0.7384,
"step": 625
},
{
"epoch": 0.81,
"learning_rate": 3.727945361411497e-05,
"loss": 0.7313,
"step": 630
},
{
"epoch": 0.81,
"learning_rate": 3.7222538417757546e-05,
"loss": 0.9178,
"step": 635
},
{
"epoch": 0.82,
"learning_rate": 3.716562322140011e-05,
"loss": 0.8866,
"step": 640
},
{
"epoch": 0.83,
"learning_rate": 3.710870802504269e-05,
"loss": 0.6832,
"step": 645
},
{
"epoch": 0.83,
"learning_rate": 3.705179282868526e-05,
"loss": 0.776,
"step": 650
},
{
"epoch": 0.84,
"learning_rate": 3.699487763232783e-05,
"loss": 0.7298,
"step": 655
},
{
"epoch": 0.84,
"learning_rate": 3.6937962435970406e-05,
"loss": 0.7268,
"step": 660
},
{
"epoch": 0.85,
"learning_rate": 3.688104723961298e-05,
"loss": 0.8073,
"step": 665
},
{
"epoch": 0.86,
"learning_rate": 3.682413204325555e-05,
"loss": 0.7678,
"step": 670
},
{
"epoch": 0.86,
"learning_rate": 3.6767216846898126e-05,
"loss": 0.8216,
"step": 675
},
{
"epoch": 0.87,
"learning_rate": 3.67103016505407e-05,
"loss": 0.6896,
"step": 680
},
{
"epoch": 0.88,
"learning_rate": 3.6653386454183266e-05,
"loss": 0.8691,
"step": 685
},
{
"epoch": 0.88,
"learning_rate": 3.659647125782584e-05,
"loss": 0.8097,
"step": 690
},
{
"epoch": 0.89,
"learning_rate": 3.653955606146841e-05,
"loss": 0.7124,
"step": 695
},
{
"epoch": 0.9,
"learning_rate": 3.6482640865110987e-05,
"loss": 0.8661,
"step": 700
},
{
"epoch": 0.9,
"learning_rate": 3.642572566875356e-05,
"loss": 1.0455,
"step": 705
},
{
"epoch": 0.91,
"learning_rate": 3.636881047239613e-05,
"loss": 0.8263,
"step": 710
},
{
"epoch": 0.92,
"learning_rate": 3.631189527603871e-05,
"loss": 0.6256,
"step": 715
},
{
"epoch": 0.92,
"learning_rate": 3.625498007968128e-05,
"loss": 0.788,
"step": 720
},
{
"epoch": 0.93,
"learning_rate": 3.6198064883323853e-05,
"loss": 0.7374,
"step": 725
},
{
"epoch": 0.93,
"learning_rate": 3.614114968696642e-05,
"loss": 0.6936,
"step": 730
},
{
"epoch": 0.94,
"learning_rate": 3.6084234490608993e-05,
"loss": 0.7579,
"step": 735
},
{
"epoch": 0.95,
"learning_rate": 3.602731929425157e-05,
"loss": 0.7191,
"step": 740
},
{
"epoch": 0.95,
"learning_rate": 3.597040409789414e-05,
"loss": 0.7349,
"step": 745
},
{
"epoch": 0.96,
"learning_rate": 3.5913488901536714e-05,
"loss": 0.6269,
"step": 750
},
{
"epoch": 0.97,
"learning_rate": 3.585657370517929e-05,
"loss": 0.664,
"step": 755
},
{
"epoch": 0.97,
"learning_rate": 3.579965850882186e-05,
"loss": 0.6365,
"step": 760
},
{
"epoch": 0.98,
"learning_rate": 3.574274331246443e-05,
"loss": 0.756,
"step": 765
},
{
"epoch": 0.99,
"learning_rate": 3.568582811610701e-05,
"loss": 0.7975,
"step": 770
},
{
"epoch": 0.99,
"learning_rate": 3.5628912919749574e-05,
"loss": 0.8584,
"step": 775
},
{
"epoch": 1.0,
"learning_rate": 3.557199772339215e-05,
"loss": 0.6439,
"step": 780
},
{
"epoch": 1.0,
"eval_accuracy": 0.8138,
"eval_loss": 0.6126329302787781,
"eval_runtime": 60.9802,
"eval_samples_per_second": 163.988,
"eval_steps_per_second": 10.249,
"step": 781
},
{
"epoch": 1.01,
"learning_rate": 3.551508252703472e-05,
"loss": 0.6383,
"step": 785
},
{
"epoch": 1.01,
"learning_rate": 3.5458167330677294e-05,
"loss": 0.6756,
"step": 790
},
{
"epoch": 1.02,
"learning_rate": 3.540125213431987e-05,
"loss": 0.5847,
"step": 795
},
{
"epoch": 1.02,
"learning_rate": 3.534433693796244e-05,
"loss": 0.5047,
"step": 800
},
{
"epoch": 1.03,
"learning_rate": 3.5287421741605014e-05,
"loss": 0.5946,
"step": 805
},
{
"epoch": 1.04,
"learning_rate": 3.523050654524758e-05,
"loss": 0.51,
"step": 810
},
{
"epoch": 1.04,
"learning_rate": 3.5173591348890154e-05,
"loss": 0.5915,
"step": 815
},
{
"epoch": 1.05,
"learning_rate": 3.511667615253273e-05,
"loss": 0.6674,
"step": 820
},
{
"epoch": 1.06,
"learning_rate": 3.50597609561753e-05,
"loss": 0.6427,
"step": 825
},
{
"epoch": 1.06,
"learning_rate": 3.5002845759817874e-05,
"loss": 0.6404,
"step": 830
},
{
"epoch": 1.07,
"learning_rate": 3.494593056346045e-05,
"loss": 0.5568,
"step": 835
},
{
"epoch": 1.08,
"learning_rate": 3.488901536710302e-05,
"loss": 0.7048,
"step": 840
},
{
"epoch": 1.08,
"learning_rate": 3.4832100170745594e-05,
"loss": 0.5817,
"step": 845
},
{
"epoch": 1.09,
"learning_rate": 3.477518497438817e-05,
"loss": 0.5192,
"step": 850
},
{
"epoch": 1.09,
"learning_rate": 3.4718269778030734e-05,
"loss": 0.7096,
"step": 855
},
{
"epoch": 1.1,
"learning_rate": 3.466135458167331e-05,
"loss": 0.561,
"step": 860
},
{
"epoch": 1.11,
"learning_rate": 3.460443938531588e-05,
"loss": 0.6275,
"step": 865
},
{
"epoch": 1.11,
"learning_rate": 3.4547524188958454e-05,
"loss": 0.5082,
"step": 870
},
{
"epoch": 1.12,
"learning_rate": 3.449060899260103e-05,
"loss": 0.616,
"step": 875
},
{
"epoch": 1.13,
"learning_rate": 3.44336937962436e-05,
"loss": 0.5976,
"step": 880
},
{
"epoch": 1.13,
"learning_rate": 3.4376778599886174e-05,
"loss": 0.6847,
"step": 885
},
{
"epoch": 1.14,
"learning_rate": 3.431986340352874e-05,
"loss": 0.4798,
"step": 890
},
{
"epoch": 1.15,
"learning_rate": 3.426294820717132e-05,
"loss": 0.6393,
"step": 895
},
{
"epoch": 1.15,
"learning_rate": 3.420603301081389e-05,
"loss": 0.4907,
"step": 900
},
{
"epoch": 1.16,
"learning_rate": 3.414911781445646e-05,
"loss": 0.4741,
"step": 905
},
{
"epoch": 1.17,
"learning_rate": 3.4092202618099035e-05,
"loss": 0.4989,
"step": 910
},
{
"epoch": 1.17,
"learning_rate": 3.403528742174161e-05,
"loss": 0.6102,
"step": 915
},
{
"epoch": 1.18,
"learning_rate": 3.397837222538418e-05,
"loss": 0.6051,
"step": 920
},
{
"epoch": 1.18,
"learning_rate": 3.3921457029026755e-05,
"loss": 0.5615,
"step": 925
},
{
"epoch": 1.19,
"learning_rate": 3.386454183266933e-05,
"loss": 0.5091,
"step": 930
},
{
"epoch": 1.2,
"learning_rate": 3.3807626636311895e-05,
"loss": 0.5863,
"step": 935
},
{
"epoch": 1.2,
"learning_rate": 3.375071143995447e-05,
"loss": 0.6056,
"step": 940
},
{
"epoch": 1.21,
"learning_rate": 3.369379624359704e-05,
"loss": 0.4893,
"step": 945
},
{
"epoch": 1.22,
"learning_rate": 3.3636881047239615e-05,
"loss": 0.607,
"step": 950
},
{
"epoch": 1.22,
"learning_rate": 3.357996585088219e-05,
"loss": 0.5942,
"step": 955
},
{
"epoch": 1.23,
"learning_rate": 3.352305065452476e-05,
"loss": 0.5453,
"step": 960
},
{
"epoch": 1.24,
"learning_rate": 3.3466135458167335e-05,
"loss": 0.5637,
"step": 965
},
{
"epoch": 1.24,
"learning_rate": 3.34092202618099e-05,
"loss": 0.5974,
"step": 970
},
{
"epoch": 1.25,
"learning_rate": 3.335230506545248e-05,
"loss": 0.5365,
"step": 975
},
{
"epoch": 1.25,
"learning_rate": 3.329538986909505e-05,
"loss": 0.5487,
"step": 980
},
{
"epoch": 1.26,
"learning_rate": 3.323847467273762e-05,
"loss": 0.5981,
"step": 985
},
{
"epoch": 1.27,
"learning_rate": 3.3181559476380195e-05,
"loss": 0.4977,
"step": 990
},
{
"epoch": 1.27,
"learning_rate": 3.312464428002277e-05,
"loss": 0.4873,
"step": 995
},
{
"epoch": 1.28,
"learning_rate": 3.306772908366534e-05,
"loss": 0.6305,
"step": 1000
},
{
"epoch": 1.29,
"learning_rate": 3.3010813887307915e-05,
"loss": 0.4625,
"step": 1005
},
{
"epoch": 1.29,
"learning_rate": 3.295389869095049e-05,
"loss": 0.7791,
"step": 1010
},
{
"epoch": 1.3,
"learning_rate": 3.2896983494593055e-05,
"loss": 0.5784,
"step": 1015
},
{
"epoch": 1.31,
"learning_rate": 3.2840068298235635e-05,
"loss": 0.4482,
"step": 1020
},
{
"epoch": 1.31,
"learning_rate": 3.27831531018782e-05,
"loss": 0.5718,
"step": 1025
},
{
"epoch": 1.32,
"learning_rate": 3.2726237905520775e-05,
"loss": 0.5399,
"step": 1030
},
{
"epoch": 1.33,
"learning_rate": 3.266932270916335e-05,
"loss": 0.5408,
"step": 1035
},
{
"epoch": 1.33,
"learning_rate": 3.261240751280592e-05,
"loss": 0.5713,
"step": 1040
},
{
"epoch": 1.34,
"learning_rate": 3.2555492316448495e-05,
"loss": 0.3968,
"step": 1045
},
{
"epoch": 1.34,
"learning_rate": 3.249857712009107e-05,
"loss": 0.5708,
"step": 1050
},
{
"epoch": 1.35,
"learning_rate": 3.244166192373364e-05,
"loss": 0.6139,
"step": 1055
},
{
"epoch": 1.36,
"learning_rate": 3.238474672737621e-05,
"loss": 0.6031,
"step": 1060
},
{
"epoch": 1.36,
"learning_rate": 3.232783153101878e-05,
"loss": 0.4819,
"step": 1065
},
{
"epoch": 1.37,
"learning_rate": 3.2270916334661356e-05,
"loss": 0.5141,
"step": 1070
},
{
"epoch": 1.38,
"learning_rate": 3.221400113830393e-05,
"loss": 0.4998,
"step": 1075
},
{
"epoch": 1.38,
"learning_rate": 3.21570859419465e-05,
"loss": 0.4646,
"step": 1080
},
{
"epoch": 1.39,
"learning_rate": 3.2100170745589076e-05,
"loss": 0.4859,
"step": 1085
},
{
"epoch": 1.4,
"learning_rate": 3.204325554923165e-05,
"loss": 0.5069,
"step": 1090
},
{
"epoch": 1.4,
"learning_rate": 3.1986340352874216e-05,
"loss": 0.5751,
"step": 1095
},
{
"epoch": 1.41,
"learning_rate": 3.1929425156516796e-05,
"loss": 0.4505,
"step": 1100
},
{
"epoch": 1.41,
"learning_rate": 3.187250996015936e-05,
"loss": 0.5396,
"step": 1105
},
{
"epoch": 1.42,
"learning_rate": 3.1815594763801936e-05,
"loss": 0.5394,
"step": 1110
},
{
"epoch": 1.43,
"learning_rate": 3.175867956744451e-05,
"loss": 0.6824,
"step": 1115
},
{
"epoch": 1.43,
"learning_rate": 3.170176437108708e-05,
"loss": 0.414,
"step": 1120
},
{
"epoch": 1.44,
"learning_rate": 3.1644849174729656e-05,
"loss": 0.5944,
"step": 1125
},
{
"epoch": 1.45,
"learning_rate": 3.158793397837223e-05,
"loss": 0.5384,
"step": 1130
},
{
"epoch": 1.45,
"learning_rate": 3.15310187820148e-05,
"loss": 0.7521,
"step": 1135
},
{
"epoch": 1.46,
"learning_rate": 3.147410358565737e-05,
"loss": 0.6244,
"step": 1140
},
{
"epoch": 1.47,
"learning_rate": 3.141718838929995e-05,
"loss": 0.4822,
"step": 1145
},
{
"epoch": 1.47,
"learning_rate": 3.1360273192942516e-05,
"loss": 0.5942,
"step": 1150
},
{
"epoch": 1.48,
"learning_rate": 3.130335799658509e-05,
"loss": 0.5526,
"step": 1155
},
{
"epoch": 1.49,
"learning_rate": 3.124644280022766e-05,
"loss": 0.5807,
"step": 1160
},
{
"epoch": 1.49,
"learning_rate": 3.1189527603870236e-05,
"loss": 0.6191,
"step": 1165
},
{
"epoch": 1.5,
"learning_rate": 3.113261240751281e-05,
"loss": 0.4252,
"step": 1170
},
{
"epoch": 1.5,
"learning_rate": 3.107569721115538e-05,
"loss": 0.6039,
"step": 1175
},
{
"epoch": 1.51,
"learning_rate": 3.1018782014797956e-05,
"loss": 0.5023,
"step": 1180
},
{
"epoch": 1.52,
"learning_rate": 3.096186681844052e-05,
"loss": 0.4397,
"step": 1185
},
{
"epoch": 1.52,
"learning_rate": 3.0904951622083096e-05,
"loss": 0.5488,
"step": 1190
},
{
"epoch": 1.53,
"learning_rate": 3.084803642572567e-05,
"loss": 0.4943,
"step": 1195
},
{
"epoch": 1.54,
"learning_rate": 3.079112122936824e-05,
"loss": 0.4196,
"step": 1200
},
{
"epoch": 1.54,
"learning_rate": 3.0734206033010816e-05,
"loss": 0.5103,
"step": 1205
},
{
"epoch": 1.55,
"learning_rate": 3.067729083665339e-05,
"loss": 0.5383,
"step": 1210
},
{
"epoch": 1.56,
"learning_rate": 3.062037564029596e-05,
"loss": 0.5533,
"step": 1215
},
{
"epoch": 1.56,
"learning_rate": 3.056346044393853e-05,
"loss": 0.6003,
"step": 1220
},
{
"epoch": 1.57,
"learning_rate": 3.0506545247581107e-05,
"loss": 0.3887,
"step": 1225
},
{
"epoch": 1.57,
"learning_rate": 3.0449630051223676e-05,
"loss": 0.4925,
"step": 1230
},
{
"epoch": 1.58,
"learning_rate": 3.0392714854866253e-05,
"loss": 0.5327,
"step": 1235
},
{
"epoch": 1.59,
"learning_rate": 3.0335799658508823e-05,
"loss": 0.4195,
"step": 1240
},
{
"epoch": 1.59,
"learning_rate": 3.0278884462151397e-05,
"loss": 0.4912,
"step": 1245
},
{
"epoch": 1.6,
"learning_rate": 3.022196926579397e-05,
"loss": 0.6002,
"step": 1250
},
{
"epoch": 1.61,
"learning_rate": 3.016505406943654e-05,
"loss": 0.5191,
"step": 1255
},
{
"epoch": 1.61,
"learning_rate": 3.0108138873079117e-05,
"loss": 0.4732,
"step": 1260
},
{
"epoch": 1.62,
"learning_rate": 3.0051223676721687e-05,
"loss": 0.4728,
"step": 1265
},
{
"epoch": 1.63,
"learning_rate": 2.999430848036426e-05,
"loss": 0.658,
"step": 1270
},
{
"epoch": 1.63,
"learning_rate": 2.993739328400683e-05,
"loss": 0.3973,
"step": 1275
},
{
"epoch": 1.64,
"learning_rate": 2.9880478087649403e-05,
"loss": 0.518,
"step": 1280
},
{
"epoch": 1.65,
"learning_rate": 2.982356289129198e-05,
"loss": 0.513,
"step": 1285
},
{
"epoch": 1.65,
"learning_rate": 2.976664769493455e-05,
"loss": 0.4699,
"step": 1290
},
{
"epoch": 1.66,
"learning_rate": 2.9709732498577124e-05,
"loss": 0.5086,
"step": 1295
},
{
"epoch": 1.66,
"learning_rate": 2.9652817302219694e-05,
"loss": 0.4464,
"step": 1300
},
{
"epoch": 1.67,
"learning_rate": 2.9595902105862267e-05,
"loss": 0.4587,
"step": 1305
},
{
"epoch": 1.68,
"learning_rate": 2.953898690950484e-05,
"loss": 0.5568,
"step": 1310
},
{
"epoch": 1.68,
"learning_rate": 2.9482071713147414e-05,
"loss": 0.4991,
"step": 1315
},
{
"epoch": 1.69,
"learning_rate": 2.9425156516789984e-05,
"loss": 0.4953,
"step": 1320
},
{
"epoch": 1.7,
"learning_rate": 2.9368241320432557e-05,
"loss": 0.5821,
"step": 1325
},
{
"epoch": 1.7,
"learning_rate": 2.9311326124075134e-05,
"loss": 0.4582,
"step": 1330
},
{
"epoch": 1.71,
"learning_rate": 2.9254410927717704e-05,
"loss": 0.4931,
"step": 1335
},
{
"epoch": 1.72,
"learning_rate": 2.9197495731360277e-05,
"loss": 0.4979,
"step": 1340
},
{
"epoch": 1.72,
"learning_rate": 2.9140580535002847e-05,
"loss": 0.4933,
"step": 1345
},
{
"epoch": 1.73,
"learning_rate": 2.908366533864542e-05,
"loss": 0.463,
"step": 1350
},
{
"epoch": 1.73,
"learning_rate": 2.902675014228799e-05,
"loss": 0.4945,
"step": 1355
},
{
"epoch": 1.74,
"learning_rate": 2.8969834945930567e-05,
"loss": 0.4822,
"step": 1360
},
{
"epoch": 1.75,
"learning_rate": 2.8912919749573137e-05,
"loss": 0.5452,
"step": 1365
},
{
"epoch": 1.75,
"learning_rate": 2.885600455321571e-05,
"loss": 0.4868,
"step": 1370
},
{
"epoch": 1.76,
"learning_rate": 2.8799089356858284e-05,
"loss": 0.553,
"step": 1375
},
{
"epoch": 1.77,
"learning_rate": 2.8742174160500854e-05,
"loss": 0.5744,
"step": 1380
},
{
"epoch": 1.77,
"learning_rate": 2.868525896414343e-05,
"loss": 0.5091,
"step": 1385
},
{
"epoch": 1.78,
"learning_rate": 2.8628343767786e-05,
"loss": 0.5209,
"step": 1390
},
{
"epoch": 1.79,
"learning_rate": 2.8571428571428574e-05,
"loss": 0.5506,
"step": 1395
},
{
"epoch": 1.79,
"learning_rate": 2.8514513375071144e-05,
"loss": 0.5383,
"step": 1400
},
{
"epoch": 1.8,
"learning_rate": 2.8457598178713718e-05,
"loss": 0.5534,
"step": 1405
},
{
"epoch": 1.81,
"learning_rate": 2.8400682982356294e-05,
"loss": 0.3911,
"step": 1410
},
{
"epoch": 1.81,
"learning_rate": 2.8343767785998864e-05,
"loss": 0.501,
"step": 1415
},
{
"epoch": 1.82,
"learning_rate": 2.8286852589641438e-05,
"loss": 0.4988,
"step": 1420
},
{
"epoch": 1.82,
"learning_rate": 2.8229937393284008e-05,
"loss": 0.5158,
"step": 1425
},
{
"epoch": 1.83,
"learning_rate": 2.817302219692658e-05,
"loss": 0.4976,
"step": 1430
},
{
"epoch": 1.84,
"learning_rate": 2.811610700056915e-05,
"loss": 0.4873,
"step": 1435
},
{
"epoch": 1.84,
"learning_rate": 2.8059191804211728e-05,
"loss": 0.5198,
"step": 1440
},
{
"epoch": 1.85,
"learning_rate": 2.8002276607854298e-05,
"loss": 0.4795,
"step": 1445
},
{
"epoch": 1.86,
"learning_rate": 2.794536141149687e-05,
"loss": 0.5029,
"step": 1450
},
{
"epoch": 1.86,
"learning_rate": 2.7888446215139448e-05,
"loss": 0.4574,
"step": 1455
},
{
"epoch": 1.87,
"learning_rate": 2.7831531018782018e-05,
"loss": 0.4224,
"step": 1460
},
{
"epoch": 1.88,
"learning_rate": 2.777461582242459e-05,
"loss": 0.4447,
"step": 1465
},
{
"epoch": 1.88,
"learning_rate": 2.771770062606716e-05,
"loss": 0.5863,
"step": 1470
},
{
"epoch": 1.89,
"learning_rate": 2.7660785429709735e-05,
"loss": 0.5724,
"step": 1475
},
{
"epoch": 1.89,
"learning_rate": 2.7603870233352305e-05,
"loss": 0.4397,
"step": 1480
},
{
"epoch": 1.9,
"learning_rate": 2.754695503699488e-05,
"loss": 0.441,
"step": 1485
},
{
"epoch": 1.91,
"learning_rate": 2.749003984063745e-05,
"loss": 0.549,
"step": 1490
},
{
"epoch": 1.91,
"learning_rate": 2.7433124644280025e-05,
"loss": 0.4723,
"step": 1495
},
{
"epoch": 1.92,
"learning_rate": 2.7376209447922598e-05,
"loss": 0.4554,
"step": 1500
},
{
"epoch": 1.93,
"learning_rate": 2.7319294251565168e-05,
"loss": 0.5067,
"step": 1505
},
{
"epoch": 1.93,
"learning_rate": 2.7262379055207745e-05,
"loss": 0.3471,
"step": 1510
},
{
"epoch": 1.94,
"learning_rate": 2.7205463858850315e-05,
"loss": 0.4403,
"step": 1515
},
{
"epoch": 1.95,
"learning_rate": 2.714854866249289e-05,
"loss": 0.4034,
"step": 1520
},
{
"epoch": 1.95,
"learning_rate": 2.709163346613546e-05,
"loss": 0.617,
"step": 1525
},
{
"epoch": 1.96,
"learning_rate": 2.7034718269778032e-05,
"loss": 0.489,
"step": 1530
},
{
"epoch": 1.97,
"learning_rate": 2.697780307342061e-05,
"loss": 0.4514,
"step": 1535
},
{
"epoch": 1.97,
"learning_rate": 2.692088787706318e-05,
"loss": 0.4604,
"step": 1540
},
{
"epoch": 1.98,
"learning_rate": 2.6863972680705752e-05,
"loss": 0.4845,
"step": 1545
},
{
"epoch": 1.98,
"learning_rate": 2.6807057484348322e-05,
"loss": 0.4273,
"step": 1550
},
{
"epoch": 1.99,
"learning_rate": 2.6750142287990895e-05,
"loss": 0.3995,
"step": 1555
},
{
"epoch": 2.0,
"learning_rate": 2.6693227091633465e-05,
"loss": 0.6222,
"step": 1560
},
{
"epoch": 2.0,
"eval_accuracy": 0.8393,
"eval_loss": 0.5094287395477295,
"eval_runtime": 60.7156,
"eval_samples_per_second": 164.702,
"eval_steps_per_second": 10.294,
"step": 1562
},
{
"epoch": 2.0,
"learning_rate": 2.6636311895276042e-05,
"loss": 0.3977,
"step": 1565
},
{
"epoch": 2.01,
"learning_rate": 2.6579396698918612e-05,
"loss": 0.2847,
"step": 1570
},
{
"epoch": 2.02,
"learning_rate": 2.6522481502561185e-05,
"loss": 0.384,
"step": 1575
},
{
"epoch": 2.02,
"learning_rate": 2.6465566306203762e-05,
"loss": 0.3344,
"step": 1580
},
{
"epoch": 2.03,
"learning_rate": 2.6408651109846332e-05,
"loss": 0.347,
"step": 1585
},
{
"epoch": 2.04,
"learning_rate": 2.6351735913488905e-05,
"loss": 0.3207,
"step": 1590
},
{
"epoch": 2.04,
"learning_rate": 2.6294820717131475e-05,
"loss": 0.3625,
"step": 1595
},
{
"epoch": 2.05,
"learning_rate": 2.623790552077405e-05,
"loss": 0.2822,
"step": 1600
},
{
"epoch": 2.06,
"learning_rate": 2.618099032441662e-05,
"loss": 0.3479,
"step": 1605
},
{
"epoch": 2.06,
"learning_rate": 2.6124075128059196e-05,
"loss": 0.318,
"step": 1610
},
{
"epoch": 2.07,
"learning_rate": 2.6067159931701766e-05,
"loss": 0.3668,
"step": 1615
},
{
"epoch": 2.07,
"learning_rate": 2.601024473534434e-05,
"loss": 0.3594,
"step": 1620
},
{
"epoch": 2.08,
"learning_rate": 2.5953329538986912e-05,
"loss": 0.3636,
"step": 1625
},
{
"epoch": 2.09,
"learning_rate": 2.5896414342629482e-05,
"loss": 0.3588,
"step": 1630
},
{
"epoch": 2.09,
"learning_rate": 2.583949914627206e-05,
"loss": 0.3155,
"step": 1635
},
{
"epoch": 2.1,
"learning_rate": 2.578258394991463e-05,
"loss": 0.3362,
"step": 1640
},
{
"epoch": 2.11,
"learning_rate": 2.5725668753557202e-05,
"loss": 0.3159,
"step": 1645
},
{
"epoch": 2.11,
"learning_rate": 2.5668753557199772e-05,
"loss": 0.3167,
"step": 1650
},
{
"epoch": 2.12,
"learning_rate": 2.5611838360842346e-05,
"loss": 0.3597,
"step": 1655
},
{
"epoch": 2.13,
"learning_rate": 2.5554923164484923e-05,
"loss": 0.2862,
"step": 1660
},
{
"epoch": 2.13,
"learning_rate": 2.5498007968127493e-05,
"loss": 0.4218,
"step": 1665
},
{
"epoch": 2.14,
"learning_rate": 2.5441092771770066e-05,
"loss": 0.3902,
"step": 1670
},
{
"epoch": 2.14,
"learning_rate": 2.5384177575412636e-05,
"loss": 0.371,
"step": 1675
},
{
"epoch": 2.15,
"learning_rate": 2.532726237905521e-05,
"loss": 0.3218,
"step": 1680
},
{
"epoch": 2.16,
"learning_rate": 2.527034718269778e-05,
"loss": 0.3233,
"step": 1685
},
{
"epoch": 2.16,
"learning_rate": 2.5213431986340356e-05,
"loss": 0.3293,
"step": 1690
},
{
"epoch": 2.17,
"learning_rate": 2.5156516789982926e-05,
"loss": 0.295,
"step": 1695
},
{
"epoch": 2.18,
"learning_rate": 2.50996015936255e-05,
"loss": 0.3192,
"step": 1700
},
{
"epoch": 2.18,
"learning_rate": 2.5042686397268073e-05,
"loss": 0.2638,
"step": 1705
},
{
"epoch": 2.19,
"learning_rate": 2.4985771200910646e-05,
"loss": 0.3065,
"step": 1710
},
{
"epoch": 2.2,
"learning_rate": 2.492885600455322e-05,
"loss": 0.3483,
"step": 1715
},
{
"epoch": 2.2,
"learning_rate": 2.487194080819579e-05,
"loss": 0.3138,
"step": 1720
},
{
"epoch": 2.21,
"learning_rate": 2.4815025611838363e-05,
"loss": 0.3677,
"step": 1725
},
{
"epoch": 2.22,
"learning_rate": 2.4758110415480933e-05,
"loss": 0.3726,
"step": 1730
},
{
"epoch": 2.22,
"learning_rate": 2.470119521912351e-05,
"loss": 0.3356,
"step": 1735
},
{
"epoch": 2.23,
"learning_rate": 2.4644280022766083e-05,
"loss": 0.3099,
"step": 1740
},
{
"epoch": 2.23,
"learning_rate": 2.4587364826408653e-05,
"loss": 0.283,
"step": 1745
},
{
"epoch": 2.24,
"learning_rate": 2.4530449630051226e-05,
"loss": 0.2828,
"step": 1750
},
{
"epoch": 2.25,
"learning_rate": 2.4473534433693796e-05,
"loss": 0.3751,
"step": 1755
},
{
"epoch": 2.25,
"learning_rate": 2.4416619237336373e-05,
"loss": 0.3227,
"step": 1760
},
{
"epoch": 2.26,
"learning_rate": 2.4359704040978943e-05,
"loss": 0.3716,
"step": 1765
},
{
"epoch": 2.27,
"learning_rate": 2.4302788844621517e-05,
"loss": 0.3669,
"step": 1770
},
{
"epoch": 2.27,
"learning_rate": 2.4245873648264087e-05,
"loss": 0.3195,
"step": 1775
},
{
"epoch": 2.28,
"learning_rate": 2.418895845190666e-05,
"loss": 0.3147,
"step": 1780
},
{
"epoch": 2.29,
"learning_rate": 2.4132043255549237e-05,
"loss": 0.339,
"step": 1785
},
{
"epoch": 2.29,
"learning_rate": 2.4075128059191807e-05,
"loss": 0.3949,
"step": 1790
},
{
"epoch": 2.3,
"learning_rate": 2.401821286283438e-05,
"loss": 0.2976,
"step": 1795
},
{
"epoch": 2.3,
"learning_rate": 2.396129766647695e-05,
"loss": 0.4075,
"step": 1800
},
{
"epoch": 2.31,
"learning_rate": 2.3904382470119523e-05,
"loss": 0.3482,
"step": 1805
},
{
"epoch": 2.32,
"learning_rate": 2.3847467273762093e-05,
"loss": 0.4089,
"step": 1810
},
{
"epoch": 2.32,
"learning_rate": 2.379055207740467e-05,
"loss": 0.3574,
"step": 1815
},
{
"epoch": 2.33,
"learning_rate": 2.373363688104724e-05,
"loss": 0.3617,
"step": 1820
},
{
"epoch": 2.34,
"learning_rate": 2.3676721684689814e-05,
"loss": 0.3421,
"step": 1825
},
{
"epoch": 2.34,
"learning_rate": 2.3619806488332387e-05,
"loss": 0.3523,
"step": 1830
},
{
"epoch": 2.35,
"learning_rate": 2.3562891291974957e-05,
"loss": 0.3594,
"step": 1835
},
{
"epoch": 2.36,
"learning_rate": 2.3505976095617534e-05,
"loss": 0.3177,
"step": 1840
},
{
"epoch": 2.36,
"learning_rate": 2.3449060899260104e-05,
"loss": 0.3867,
"step": 1845
},
{
"epoch": 2.37,
"learning_rate": 2.3392145702902677e-05,
"loss": 0.3826,
"step": 1850
},
{
"epoch": 2.38,
"learning_rate": 2.3335230506545247e-05,
"loss": 0.2243,
"step": 1855
},
{
"epoch": 2.38,
"learning_rate": 2.3278315310187824e-05,
"loss": 0.3039,
"step": 1860
},
{
"epoch": 2.39,
"learning_rate": 2.3221400113830397e-05,
"loss": 0.3555,
"step": 1865
},
{
"epoch": 2.39,
"learning_rate": 2.3164484917472967e-05,
"loss": 0.3321,
"step": 1870
},
{
"epoch": 2.4,
"learning_rate": 2.310756972111554e-05,
"loss": 0.3334,
"step": 1875
},
{
"epoch": 2.41,
"learning_rate": 2.305065452475811e-05,
"loss": 0.3629,
"step": 1880
},
{
"epoch": 2.41,
"learning_rate": 2.2993739328400687e-05,
"loss": 0.2421,
"step": 1885
},
{
"epoch": 2.42,
"learning_rate": 2.2936824132043257e-05,
"loss": 0.3204,
"step": 1890
},
{
"epoch": 2.43,
"learning_rate": 2.287990893568583e-05,
"loss": 0.3631,
"step": 1895
},
{
"epoch": 2.43,
"learning_rate": 2.28229937393284e-05,
"loss": 0.3279,
"step": 1900
},
{
"epoch": 2.44,
"learning_rate": 2.2766078542970974e-05,
"loss": 0.3008,
"step": 1905
},
{
"epoch": 2.45,
"learning_rate": 2.270916334661355e-05,
"loss": 0.4036,
"step": 1910
},
{
"epoch": 2.45,
"learning_rate": 2.265224815025612e-05,
"loss": 0.3201,
"step": 1915
},
{
"epoch": 2.46,
"learning_rate": 2.2595332953898694e-05,
"loss": 0.3041,
"step": 1920
},
{
"epoch": 2.46,
"learning_rate": 2.2538417757541264e-05,
"loss": 0.3208,
"step": 1925
},
{
"epoch": 2.47,
"learning_rate": 2.2481502561183838e-05,
"loss": 0.2943,
"step": 1930
},
{
"epoch": 2.48,
"learning_rate": 2.2424587364826408e-05,
"loss": 0.2831,
"step": 1935
},
{
"epoch": 2.48,
"learning_rate": 2.2367672168468984e-05,
"loss": 0.3645,
"step": 1940
},
{
"epoch": 2.49,
"learning_rate": 2.2310756972111554e-05,
"loss": 0.3532,
"step": 1945
},
{
"epoch": 2.5,
"learning_rate": 2.2253841775754128e-05,
"loss": 0.3504,
"step": 1950
},
{
"epoch": 2.5,
"learning_rate": 2.21969265793967e-05,
"loss": 0.3465,
"step": 1955
},
{
"epoch": 2.51,
"learning_rate": 2.214001138303927e-05,
"loss": 0.358,
"step": 1960
},
{
"epoch": 2.52,
"learning_rate": 2.2083096186681848e-05,
"loss": 0.3855,
"step": 1965
},
{
"epoch": 2.52,
"learning_rate": 2.2026180990324418e-05,
"loss": 0.2887,
"step": 1970
},
{
"epoch": 2.53,
"learning_rate": 2.196926579396699e-05,
"loss": 0.275,
"step": 1975
},
{
"epoch": 2.54,
"learning_rate": 2.191235059760956e-05,
"loss": 0.2384,
"step": 1980
},
{
"epoch": 2.54,
"learning_rate": 2.1855435401252138e-05,
"loss": 0.2829,
"step": 1985
},
{
"epoch": 2.55,
"learning_rate": 2.179852020489471e-05,
"loss": 0.3765,
"step": 1990
},
{
"epoch": 2.55,
"learning_rate": 2.174160500853728e-05,
"loss": 0.3509,
"step": 1995
},
{
"epoch": 2.56,
"learning_rate": 2.1684689812179855e-05,
"loss": 0.3517,
"step": 2000
},
{
"epoch": 2.57,
"learning_rate": 2.1627774615822425e-05,
"loss": 0.3016,
"step": 2005
},
{
"epoch": 2.57,
"learning_rate": 2.1570859419465e-05,
"loss": 0.3421,
"step": 2010
},
{
"epoch": 2.58,
"learning_rate": 2.151394422310757e-05,
"loss": 0.3054,
"step": 2015
},
{
"epoch": 2.59,
"learning_rate": 2.1457029026750145e-05,
"loss": 0.3658,
"step": 2020
},
{
"epoch": 2.59,
"learning_rate": 2.1400113830392715e-05,
"loss": 0.2979,
"step": 2025
},
{
"epoch": 2.6,
"learning_rate": 2.1343198634035288e-05,
"loss": 0.413,
"step": 2030
},
{
"epoch": 2.61,
"learning_rate": 2.1286283437677865e-05,
"loss": 0.3388,
"step": 2035
},
{
"epoch": 2.61,
"learning_rate": 2.1229368241320435e-05,
"loss": 0.2758,
"step": 2040
},
{
"epoch": 2.62,
"learning_rate": 2.117245304496301e-05,
"loss": 0.2786,
"step": 2045
},
{
"epoch": 2.62,
"learning_rate": 2.111553784860558e-05,
"loss": 0.2577,
"step": 2050
},
{
"epoch": 2.63,
"learning_rate": 2.1058622652248152e-05,
"loss": 0.26,
"step": 2055
},
{
"epoch": 2.64,
"learning_rate": 2.1001707455890722e-05,
"loss": 0.2994,
"step": 2060
},
{
"epoch": 2.64,
"learning_rate": 2.09447922595333e-05,
"loss": 0.2211,
"step": 2065
},
{
"epoch": 2.65,
"learning_rate": 2.088787706317587e-05,
"loss": 0.3152,
"step": 2070
},
{
"epoch": 2.66,
"learning_rate": 2.0830961866818442e-05,
"loss": 0.253,
"step": 2075
},
{
"epoch": 2.66,
"learning_rate": 2.0774046670461015e-05,
"loss": 0.3429,
"step": 2080
},
{
"epoch": 2.67,
"learning_rate": 2.0717131474103585e-05,
"loss": 0.2717,
"step": 2085
},
{
"epoch": 2.68,
"learning_rate": 2.0660216277746162e-05,
"loss": 0.2923,
"step": 2090
},
{
"epoch": 2.68,
"learning_rate": 2.0603301081388732e-05,
"loss": 0.2446,
"step": 2095
},
{
"epoch": 2.69,
"learning_rate": 2.0546385885031305e-05,
"loss": 0.2661,
"step": 2100
},
{
"epoch": 2.7,
"learning_rate": 2.0489470688673875e-05,
"loss": 0.3075,
"step": 2105
},
{
"epoch": 2.7,
"learning_rate": 2.0432555492316452e-05,
"loss": 0.3915,
"step": 2110
},
{
"epoch": 2.71,
"learning_rate": 2.0375640295959025e-05,
"loss": 0.385,
"step": 2115
},
{
"epoch": 2.71,
"learning_rate": 2.0318725099601595e-05,
"loss": 0.3714,
"step": 2120
},
{
"epoch": 2.72,
"learning_rate": 2.026180990324417e-05,
"loss": 0.3581,
"step": 2125
},
{
"epoch": 2.73,
"learning_rate": 2.020489470688674e-05,
"loss": 0.2439,
"step": 2130
},
{
"epoch": 2.73,
"learning_rate": 2.0147979510529316e-05,
"loss": 0.3,
"step": 2135
},
{
"epoch": 2.74,
"learning_rate": 2.0091064314171886e-05,
"loss": 0.2996,
"step": 2140
},
{
"epoch": 2.75,
"learning_rate": 2.003414911781446e-05,
"loss": 0.305,
"step": 2145
},
{
"epoch": 2.75,
"learning_rate": 1.9977233921457032e-05,
"loss": 0.3291,
"step": 2150
},
{
"epoch": 2.76,
"learning_rate": 1.9920318725099602e-05,
"loss": 0.2964,
"step": 2155
},
{
"epoch": 2.77,
"learning_rate": 1.9863403528742176e-05,
"loss": 0.4112,
"step": 2160
},
{
"epoch": 2.77,
"learning_rate": 1.980648833238475e-05,
"loss": 0.3476,
"step": 2165
},
{
"epoch": 2.78,
"learning_rate": 1.974957313602732e-05,
"loss": 0.314,
"step": 2170
},
{
"epoch": 2.78,
"learning_rate": 1.9692657939669892e-05,
"loss": 0.2829,
"step": 2175
},
{
"epoch": 2.79,
"learning_rate": 1.9635742743312466e-05,
"loss": 0.3628,
"step": 2180
},
{
"epoch": 2.8,
"learning_rate": 1.957882754695504e-05,
"loss": 0.2601,
"step": 2185
},
{
"epoch": 2.8,
"learning_rate": 1.9521912350597613e-05,
"loss": 0.401,
"step": 2190
},
{
"epoch": 2.81,
"learning_rate": 1.9464997154240183e-05,
"loss": 0.261,
"step": 2195
},
{
"epoch": 2.82,
"learning_rate": 1.9408081957882756e-05,
"loss": 0.3531,
"step": 2200
},
{
"epoch": 2.82,
"learning_rate": 1.935116676152533e-05,
"loss": 0.3118,
"step": 2205
},
{
"epoch": 2.83,
"learning_rate": 1.92942515651679e-05,
"loss": 0.3498,
"step": 2210
},
{
"epoch": 2.84,
"learning_rate": 1.9237336368810473e-05,
"loss": 0.3738,
"step": 2215
},
{
"epoch": 2.84,
"learning_rate": 1.9180421172453046e-05,
"loss": 0.2844,
"step": 2220
},
{
"epoch": 2.85,
"learning_rate": 1.912350597609562e-05,
"loss": 0.3668,
"step": 2225
},
{
"epoch": 2.86,
"learning_rate": 1.9066590779738193e-05,
"loss": 0.4105,
"step": 2230
},
{
"epoch": 2.86,
"learning_rate": 1.9009675583380763e-05,
"loss": 0.3562,
"step": 2235
},
{
"epoch": 2.87,
"learning_rate": 1.8952760387023336e-05,
"loss": 0.3053,
"step": 2240
},
{
"epoch": 2.87,
"learning_rate": 1.889584519066591e-05,
"loss": 0.3124,
"step": 2245
},
{
"epoch": 2.88,
"learning_rate": 1.8838929994308483e-05,
"loss": 0.3148,
"step": 2250
},
{
"epoch": 2.89,
"learning_rate": 1.8782014797951053e-05,
"loss": 0.2883,
"step": 2255
},
{
"epoch": 2.89,
"learning_rate": 1.8725099601593626e-05,
"loss": 0.3433,
"step": 2260
},
{
"epoch": 2.9,
"learning_rate": 1.86681844052362e-05,
"loss": 0.343,
"step": 2265
},
{
"epoch": 2.91,
"learning_rate": 1.8611269208878773e-05,
"loss": 0.2873,
"step": 2270
},
{
"epoch": 2.91,
"learning_rate": 1.8554354012521346e-05,
"loss": 0.3344,
"step": 2275
},
{
"epoch": 2.92,
"learning_rate": 1.8497438816163916e-05,
"loss": 0.2587,
"step": 2280
},
{
"epoch": 2.93,
"learning_rate": 1.844052361980649e-05,
"loss": 0.3247,
"step": 2285
},
{
"epoch": 2.93,
"learning_rate": 1.8383608423449063e-05,
"loss": 0.281,
"step": 2290
},
{
"epoch": 2.94,
"learning_rate": 1.8326693227091633e-05,
"loss": 0.2981,
"step": 2295
},
{
"epoch": 2.94,
"learning_rate": 1.8269778030734207e-05,
"loss": 0.228,
"step": 2300
},
{
"epoch": 2.95,
"learning_rate": 1.821286283437678e-05,
"loss": 0.3926,
"step": 2305
},
{
"epoch": 2.96,
"learning_rate": 1.8155947638019353e-05,
"loss": 0.2932,
"step": 2310
},
{
"epoch": 2.96,
"learning_rate": 1.8099032441661927e-05,
"loss": 0.364,
"step": 2315
},
{
"epoch": 2.97,
"learning_rate": 1.8042117245304497e-05,
"loss": 0.4113,
"step": 2320
},
{
"epoch": 2.98,
"learning_rate": 1.798520204894707e-05,
"loss": 0.3103,
"step": 2325
},
{
"epoch": 2.98,
"learning_rate": 1.7928286852589643e-05,
"loss": 0.2307,
"step": 2330
},
{
"epoch": 2.99,
"learning_rate": 1.7871371656232213e-05,
"loss": 0.2478,
"step": 2335
},
{
"epoch": 3.0,
"learning_rate": 1.7814456459874787e-05,
"loss": 0.2912,
"step": 2340
},
{
"epoch": 3.0,
"eval_accuracy": 0.861,
"eval_loss": 0.4452311098575592,
"eval_runtime": 61.2046,
"eval_samples_per_second": 163.386,
"eval_steps_per_second": 10.212,
"step": 2343
},
{
"epoch": 3.0,
"learning_rate": 1.775754126351736e-05,
"loss": 0.2109,
"step": 2345
},
{
"epoch": 3.01,
"learning_rate": 1.7700626067159934e-05,
"loss": 0.2094,
"step": 2350
},
{
"epoch": 3.02,
"learning_rate": 1.7643710870802507e-05,
"loss": 0.2467,
"step": 2355
},
{
"epoch": 3.02,
"learning_rate": 1.7586795674445077e-05,
"loss": 0.2747,
"step": 2360
},
{
"epoch": 3.03,
"learning_rate": 1.752988047808765e-05,
"loss": 0.1656,
"step": 2365
},
{
"epoch": 3.03,
"learning_rate": 1.7472965281730224e-05,
"loss": 0.1659,
"step": 2370
},
{
"epoch": 3.04,
"learning_rate": 1.7416050085372797e-05,
"loss": 0.2871,
"step": 2375
},
{
"epoch": 3.05,
"learning_rate": 1.7359134889015367e-05,
"loss": 0.2369,
"step": 2380
},
{
"epoch": 3.05,
"learning_rate": 1.730221969265794e-05,
"loss": 0.2459,
"step": 2385
},
{
"epoch": 3.06,
"learning_rate": 1.7245304496300514e-05,
"loss": 0.1826,
"step": 2390
},
{
"epoch": 3.07,
"learning_rate": 1.7188389299943087e-05,
"loss": 0.2467,
"step": 2395
},
{
"epoch": 3.07,
"learning_rate": 1.713147410358566e-05,
"loss": 0.2196,
"step": 2400
},
{
"epoch": 3.08,
"learning_rate": 1.707455890722823e-05,
"loss": 0.2427,
"step": 2405
},
{
"epoch": 3.09,
"learning_rate": 1.7017643710870804e-05,
"loss": 0.2632,
"step": 2410
},
{
"epoch": 3.09,
"learning_rate": 1.6960728514513377e-05,
"loss": 0.1924,
"step": 2415
},
{
"epoch": 3.1,
"learning_rate": 1.6903813318155947e-05,
"loss": 0.2471,
"step": 2420
},
{
"epoch": 3.1,
"learning_rate": 1.684689812179852e-05,
"loss": 0.2159,
"step": 2425
},
{
"epoch": 3.11,
"learning_rate": 1.6789982925441094e-05,
"loss": 0.2591,
"step": 2430
},
{
"epoch": 3.12,
"learning_rate": 1.6733067729083667e-05,
"loss": 0.2674,
"step": 2435
},
{
"epoch": 3.12,
"learning_rate": 1.667615253272624e-05,
"loss": 0.2457,
"step": 2440
},
{
"epoch": 3.13,
"learning_rate": 1.661923733636881e-05,
"loss": 0.2419,
"step": 2445
},
{
"epoch": 3.14,
"learning_rate": 1.6562322140011384e-05,
"loss": 0.1977,
"step": 2450
},
{
"epoch": 3.14,
"learning_rate": 1.6505406943653958e-05,
"loss": 0.216,
"step": 2455
},
{
"epoch": 3.15,
"learning_rate": 1.6448491747296528e-05,
"loss": 0.2799,
"step": 2460
},
{
"epoch": 3.16,
"learning_rate": 1.63915765509391e-05,
"loss": 0.1789,
"step": 2465
},
{
"epoch": 3.16,
"learning_rate": 1.6334661354581674e-05,
"loss": 0.2677,
"step": 2470
},
{
"epoch": 3.17,
"learning_rate": 1.6277746158224248e-05,
"loss": 0.1893,
"step": 2475
},
{
"epoch": 3.18,
"learning_rate": 1.622083096186682e-05,
"loss": 0.1756,
"step": 2480
},
{
"epoch": 3.18,
"learning_rate": 1.616391576550939e-05,
"loss": 0.2038,
"step": 2485
},
{
"epoch": 3.19,
"learning_rate": 1.6107000569151964e-05,
"loss": 0.1776,
"step": 2490
},
{
"epoch": 3.19,
"learning_rate": 1.6050085372794538e-05,
"loss": 0.3071,
"step": 2495
},
{
"epoch": 3.2,
"learning_rate": 1.5993170176437108e-05,
"loss": 0.2819,
"step": 2500
},
{
"epoch": 3.21,
"learning_rate": 1.593625498007968e-05,
"loss": 0.2425,
"step": 2505
},
{
"epoch": 3.21,
"learning_rate": 1.5879339783722255e-05,
"loss": 0.2611,
"step": 2510
},
{
"epoch": 3.22,
"learning_rate": 1.5822424587364828e-05,
"loss": 0.1911,
"step": 2515
},
{
"epoch": 3.23,
"learning_rate": 1.57655093910074e-05,
"loss": 0.2089,
"step": 2520
},
{
"epoch": 3.23,
"learning_rate": 1.5708594194649975e-05,
"loss": 0.2004,
"step": 2525
},
{
"epoch": 3.24,
"learning_rate": 1.5651678998292545e-05,
"loss": 0.2162,
"step": 2530
},
{
"epoch": 3.25,
"learning_rate": 1.5594763801935118e-05,
"loss": 0.2117,
"step": 2535
},
{
"epoch": 3.25,
"learning_rate": 1.553784860557769e-05,
"loss": 0.2401,
"step": 2540
},
{
"epoch": 3.26,
"learning_rate": 1.548093340922026e-05,
"loss": 0.2056,
"step": 2545
},
{
"epoch": 3.26,
"learning_rate": 1.5424018212862835e-05,
"loss": 0.23,
"step": 2550
},
{
"epoch": 3.27,
"learning_rate": 1.5367103016505408e-05,
"loss": 0.2444,
"step": 2555
},
{
"epoch": 3.28,
"learning_rate": 1.531018782014798e-05,
"loss": 0.3034,
"step": 2560
},
{
"epoch": 3.28,
"learning_rate": 1.5253272623790553e-05,
"loss": 0.1683,
"step": 2565
},
{
"epoch": 3.29,
"learning_rate": 1.5196357427433127e-05,
"loss": 0.1557,
"step": 2570
},
{
"epoch": 3.3,
"learning_rate": 1.5139442231075698e-05,
"loss": 0.1504,
"step": 2575
},
{
"epoch": 3.3,
"learning_rate": 1.508252703471827e-05,
"loss": 0.2048,
"step": 2580
},
{
"epoch": 3.31,
"learning_rate": 1.5025611838360843e-05,
"loss": 0.2511,
"step": 2585
},
{
"epoch": 3.32,
"learning_rate": 1.4968696642003415e-05,
"loss": 0.204,
"step": 2590
},
{
"epoch": 3.32,
"learning_rate": 1.491178144564599e-05,
"loss": 0.2699,
"step": 2595
},
{
"epoch": 3.33,
"learning_rate": 1.4854866249288562e-05,
"loss": 0.1463,
"step": 2600
},
{
"epoch": 3.34,
"learning_rate": 1.4797951052931133e-05,
"loss": 0.1645,
"step": 2605
},
{
"epoch": 3.34,
"learning_rate": 1.4741035856573707e-05,
"loss": 0.1908,
"step": 2610
},
{
"epoch": 3.35,
"learning_rate": 1.4684120660216279e-05,
"loss": 0.2191,
"step": 2615
},
{
"epoch": 3.35,
"learning_rate": 1.4627205463858852e-05,
"loss": 0.2372,
"step": 2620
},
{
"epoch": 3.36,
"learning_rate": 1.4570290267501424e-05,
"loss": 0.2423,
"step": 2625
},
{
"epoch": 3.37,
"learning_rate": 1.4513375071143995e-05,
"loss": 0.141,
"step": 2630
},
{
"epoch": 3.37,
"learning_rate": 1.4456459874786569e-05,
"loss": 0.2345,
"step": 2635
},
{
"epoch": 3.38,
"learning_rate": 1.4399544678429142e-05,
"loss": 0.2508,
"step": 2640
},
{
"epoch": 3.39,
"learning_rate": 1.4342629482071715e-05,
"loss": 0.1987,
"step": 2645
},
{
"epoch": 3.39,
"learning_rate": 1.4285714285714287e-05,
"loss": 0.1662,
"step": 2650
},
{
"epoch": 3.4,
"learning_rate": 1.4228799089356859e-05,
"loss": 0.1729,
"step": 2655
},
{
"epoch": 3.41,
"learning_rate": 1.4171883892999432e-05,
"loss": 0.2623,
"step": 2660
},
{
"epoch": 3.41,
"learning_rate": 1.4114968696642004e-05,
"loss": 0.2242,
"step": 2665
},
{
"epoch": 3.42,
"learning_rate": 1.4058053500284576e-05,
"loss": 0.1906,
"step": 2670
},
{
"epoch": 3.42,
"learning_rate": 1.4001138303927149e-05,
"loss": 0.243,
"step": 2675
},
{
"epoch": 3.43,
"learning_rate": 1.3944223107569724e-05,
"loss": 0.2251,
"step": 2680
},
{
"epoch": 3.44,
"learning_rate": 1.3887307911212296e-05,
"loss": 0.2056,
"step": 2685
},
{
"epoch": 3.44,
"learning_rate": 1.3830392714854867e-05,
"loss": 0.2007,
"step": 2690
},
{
"epoch": 3.45,
"learning_rate": 1.377347751849744e-05,
"loss": 0.2273,
"step": 2695
},
{
"epoch": 3.46,
"learning_rate": 1.3716562322140012e-05,
"loss": 0.2652,
"step": 2700
},
{
"epoch": 3.46,
"learning_rate": 1.3659647125782584e-05,
"loss": 0.1765,
"step": 2705
},
{
"epoch": 3.47,
"learning_rate": 1.3602731929425157e-05,
"loss": 0.2289,
"step": 2710
},
{
"epoch": 3.48,
"learning_rate": 1.354581673306773e-05,
"loss": 0.2561,
"step": 2715
},
{
"epoch": 3.48,
"learning_rate": 1.3488901536710304e-05,
"loss": 0.2211,
"step": 2720
},
{
"epoch": 3.49,
"learning_rate": 1.3431986340352876e-05,
"loss": 0.1894,
"step": 2725
},
{
"epoch": 3.5,
"learning_rate": 1.3375071143995448e-05,
"loss": 0.1795,
"step": 2730
},
{
"epoch": 3.5,
"learning_rate": 1.3318155947638021e-05,
"loss": 0.1967,
"step": 2735
},
{
"epoch": 3.51,
"learning_rate": 1.3261240751280593e-05,
"loss": 0.2562,
"step": 2740
},
{
"epoch": 3.51,
"learning_rate": 1.3204325554923166e-05,
"loss": 0.2178,
"step": 2745
},
{
"epoch": 3.52,
"learning_rate": 1.3147410358565738e-05,
"loss": 0.1908,
"step": 2750
},
{
"epoch": 3.53,
"learning_rate": 1.309049516220831e-05,
"loss": 0.1789,
"step": 2755
},
{
"epoch": 3.53,
"learning_rate": 1.3033579965850883e-05,
"loss": 0.2742,
"step": 2760
},
{
"epoch": 3.54,
"learning_rate": 1.2976664769493456e-05,
"loss": 0.2605,
"step": 2765
},
{
"epoch": 3.55,
"learning_rate": 1.291974957313603e-05,
"loss": 0.2138,
"step": 2770
},
{
"epoch": 3.55,
"learning_rate": 1.2862834376778601e-05,
"loss": 0.1998,
"step": 2775
},
{
"epoch": 3.56,
"learning_rate": 1.2805919180421173e-05,
"loss": 0.2454,
"step": 2780
},
{
"epoch": 3.57,
"learning_rate": 1.2749003984063746e-05,
"loss": 0.2261,
"step": 2785
},
{
"epoch": 3.57,
"learning_rate": 1.2692088787706318e-05,
"loss": 0.1907,
"step": 2790
},
{
"epoch": 3.58,
"learning_rate": 1.263517359134889e-05,
"loss": 0.1849,
"step": 2795
},
{
"epoch": 3.58,
"learning_rate": 1.2578258394991463e-05,
"loss": 0.2552,
"step": 2800
},
{
"epoch": 3.59,
"learning_rate": 1.2521343198634036e-05,
"loss": 0.186,
"step": 2805
},
{
"epoch": 3.6,
"learning_rate": 1.246442800227661e-05,
"loss": 0.1753,
"step": 2810
},
{
"epoch": 3.6,
"learning_rate": 1.2407512805919181e-05,
"loss": 0.188,
"step": 2815
},
{
"epoch": 3.61,
"learning_rate": 1.2350597609561755e-05,
"loss": 0.2063,
"step": 2820
},
{
"epoch": 3.62,
"learning_rate": 1.2293682413204327e-05,
"loss": 0.1565,
"step": 2825
},
{
"epoch": 3.62,
"learning_rate": 1.2236767216846898e-05,
"loss": 0.185,
"step": 2830
},
{
"epoch": 3.63,
"learning_rate": 1.2179852020489472e-05,
"loss": 0.2167,
"step": 2835
},
{
"epoch": 3.64,
"learning_rate": 1.2122936824132043e-05,
"loss": 0.1588,
"step": 2840
},
{
"epoch": 3.64,
"learning_rate": 1.2066021627774618e-05,
"loss": 0.2709,
"step": 2845
},
{
"epoch": 3.65,
"learning_rate": 1.200910643141719e-05,
"loss": 0.2097,
"step": 2850
},
{
"epoch": 3.66,
"learning_rate": 1.1952191235059762e-05,
"loss": 0.1697,
"step": 2855
},
{
"epoch": 3.66,
"learning_rate": 1.1895276038702335e-05,
"loss": 0.2032,
"step": 2860
},
{
"epoch": 3.67,
"learning_rate": 1.1838360842344907e-05,
"loss": 0.2468,
"step": 2865
},
{
"epoch": 3.67,
"learning_rate": 1.1781445645987478e-05,
"loss": 0.1398,
"step": 2870
},
{
"epoch": 3.68,
"learning_rate": 1.1724530449630052e-05,
"loss": 0.2453,
"step": 2875
},
{
"epoch": 3.69,
"learning_rate": 1.1667615253272624e-05,
"loss": 0.2397,
"step": 2880
},
{
"epoch": 3.69,
"learning_rate": 1.1610700056915199e-05,
"loss": 0.1835,
"step": 2885
},
{
"epoch": 3.7,
"learning_rate": 1.155378486055777e-05,
"loss": 0.2497,
"step": 2890
},
{
"epoch": 3.71,
"learning_rate": 1.1496869664200344e-05,
"loss": 0.1499,
"step": 2895
},
{
"epoch": 3.71,
"learning_rate": 1.1439954467842915e-05,
"loss": 0.2455,
"step": 2900
},
{
"epoch": 3.72,
"learning_rate": 1.1383039271485487e-05,
"loss": 0.2016,
"step": 2905
},
{
"epoch": 3.73,
"learning_rate": 1.132612407512806e-05,
"loss": 0.2249,
"step": 2910
},
{
"epoch": 3.73,
"learning_rate": 1.1269208878770632e-05,
"loss": 0.1286,
"step": 2915
},
{
"epoch": 3.74,
"learning_rate": 1.1212293682413204e-05,
"loss": 0.2297,
"step": 2920
},
{
"epoch": 3.74,
"learning_rate": 1.1155378486055777e-05,
"loss": 0.1435,
"step": 2925
},
{
"epoch": 3.75,
"learning_rate": 1.109846328969835e-05,
"loss": 0.1694,
"step": 2930
},
{
"epoch": 3.76,
"learning_rate": 1.1041548093340924e-05,
"loss": 0.2167,
"step": 2935
},
{
"epoch": 3.76,
"learning_rate": 1.0984632896983496e-05,
"loss": 0.1979,
"step": 2940
},
{
"epoch": 3.77,
"learning_rate": 1.0927717700626069e-05,
"loss": 0.1548,
"step": 2945
},
{
"epoch": 3.78,
"learning_rate": 1.087080250426864e-05,
"loss": 0.2188,
"step": 2950
},
{
"epoch": 3.78,
"learning_rate": 1.0813887307911212e-05,
"loss": 0.2313,
"step": 2955
},
{
"epoch": 3.79,
"learning_rate": 1.0756972111553786e-05,
"loss": 0.2211,
"step": 2960
},
{
"epoch": 3.8,
"learning_rate": 1.0700056915196357e-05,
"loss": 0.1612,
"step": 2965
},
{
"epoch": 3.8,
"learning_rate": 1.0643141718838932e-05,
"loss": 0.2125,
"step": 2970
},
{
"epoch": 3.81,
"learning_rate": 1.0586226522481504e-05,
"loss": 0.206,
"step": 2975
},
{
"epoch": 3.82,
"learning_rate": 1.0529311326124076e-05,
"loss": 0.2112,
"step": 2980
},
{
"epoch": 3.82,
"learning_rate": 1.047239612976665e-05,
"loss": 0.1762,
"step": 2985
},
{
"epoch": 3.83,
"learning_rate": 1.0415480933409221e-05,
"loss": 0.169,
"step": 2990
},
{
"epoch": 3.83,
"learning_rate": 1.0358565737051793e-05,
"loss": 0.2013,
"step": 2995
},
{
"epoch": 3.84,
"learning_rate": 1.0301650540694366e-05,
"loss": 0.1734,
"step": 3000
},
{
"epoch": 3.85,
"learning_rate": 1.0244735344336938e-05,
"loss": 0.215,
"step": 3005
},
{
"epoch": 3.85,
"learning_rate": 1.0187820147979513e-05,
"loss": 0.2166,
"step": 3010
},
{
"epoch": 3.86,
"learning_rate": 1.0130904951622084e-05,
"loss": 0.2166,
"step": 3015
},
{
"epoch": 3.87,
"learning_rate": 1.0073989755264658e-05,
"loss": 0.1942,
"step": 3020
},
{
"epoch": 3.87,
"learning_rate": 1.001707455890723e-05,
"loss": 0.1952,
"step": 3025
},
{
"epoch": 3.88,
"learning_rate": 9.960159362549801e-06,
"loss": 0.1974,
"step": 3030
},
{
"epoch": 3.89,
"learning_rate": 9.903244166192375e-06,
"loss": 0.2231,
"step": 3035
},
{
"epoch": 3.89,
"learning_rate": 9.846328969834946e-06,
"loss": 0.2053,
"step": 3040
},
{
"epoch": 3.9,
"learning_rate": 9.78941377347752e-06,
"loss": 0.1894,
"step": 3045
},
{
"epoch": 3.9,
"learning_rate": 9.732498577120091e-06,
"loss": 0.2454,
"step": 3050
},
{
"epoch": 3.91,
"learning_rate": 9.675583380762665e-06,
"loss": 0.1853,
"step": 3055
},
{
"epoch": 3.92,
"learning_rate": 9.618668184405236e-06,
"loss": 0.2468,
"step": 3060
},
{
"epoch": 3.92,
"learning_rate": 9.56175298804781e-06,
"loss": 0.1915,
"step": 3065
},
{
"epoch": 3.93,
"learning_rate": 9.504837791690381e-06,
"loss": 0.2251,
"step": 3070
},
{
"epoch": 3.94,
"learning_rate": 9.447922595332955e-06,
"loss": 0.1638,
"step": 3075
},
{
"epoch": 3.94,
"learning_rate": 9.391007398975526e-06,
"loss": 0.16,
"step": 3080
},
{
"epoch": 3.95,
"learning_rate": 9.3340922026181e-06,
"loss": 0.1759,
"step": 3085
},
{
"epoch": 3.96,
"learning_rate": 9.277177006260673e-06,
"loss": 0.2079,
"step": 3090
},
{
"epoch": 3.96,
"learning_rate": 9.220261809903245e-06,
"loss": 0.22,
"step": 3095
},
{
"epoch": 3.97,
"learning_rate": 9.163346613545817e-06,
"loss": 0.2352,
"step": 3100
},
{
"epoch": 3.98,
"learning_rate": 9.10643141718839e-06,
"loss": 0.1975,
"step": 3105
},
{
"epoch": 3.98,
"learning_rate": 9.049516220830963e-06,
"loss": 0.2027,
"step": 3110
},
{
"epoch": 3.99,
"learning_rate": 8.992601024473535e-06,
"loss": 0.1782,
"step": 3115
},
{
"epoch": 3.99,
"learning_rate": 8.935685828116107e-06,
"loss": 0.2234,
"step": 3120
},
{
"epoch": 4.0,
"eval_accuracy": 0.8679,
"eval_loss": 0.43295106291770935,
"eval_runtime": 60.931,
"eval_samples_per_second": 164.12,
"eval_steps_per_second": 10.257,
"step": 3124
},
{
"epoch": 4.0,
"learning_rate": 8.87877063175868e-06,
"loss": 0.1934,
"step": 3125
},
{
"epoch": 4.01,
"learning_rate": 8.821855435401253e-06,
"loss": 0.1317,
"step": 3130
},
{
"epoch": 4.01,
"learning_rate": 8.764940239043825e-06,
"loss": 0.2049,
"step": 3135
},
{
"epoch": 4.02,
"learning_rate": 8.708025042686399e-06,
"loss": 0.1348,
"step": 3140
},
{
"epoch": 4.03,
"learning_rate": 8.65110984632897e-06,
"loss": 0.1759,
"step": 3145
},
{
"epoch": 4.03,
"learning_rate": 8.594194649971544e-06,
"loss": 0.1538,
"step": 3150
},
{
"epoch": 4.04,
"learning_rate": 8.537279453614115e-06,
"loss": 0.1096,
"step": 3155
},
{
"epoch": 4.05,
"learning_rate": 8.480364257256689e-06,
"loss": 0.1689,
"step": 3160
},
{
"epoch": 4.05,
"learning_rate": 8.42344906089926e-06,
"loss": 0.1647,
"step": 3165
},
{
"epoch": 4.06,
"learning_rate": 8.366533864541834e-06,
"loss": 0.1881,
"step": 3170
},
{
"epoch": 4.07,
"learning_rate": 8.309618668184405e-06,
"loss": 0.1345,
"step": 3175
},
{
"epoch": 4.07,
"learning_rate": 8.252703471826979e-06,
"loss": 0.134,
"step": 3180
},
{
"epoch": 4.08,
"learning_rate": 8.19578827546955e-06,
"loss": 0.1413,
"step": 3185
},
{
"epoch": 4.08,
"learning_rate": 8.138873079112124e-06,
"loss": 0.1382,
"step": 3190
},
{
"epoch": 4.09,
"learning_rate": 8.081957882754696e-06,
"loss": 0.1666,
"step": 3195
},
{
"epoch": 4.1,
"learning_rate": 8.025042686397269e-06,
"loss": 0.1115,
"step": 3200
},
{
"epoch": 4.1,
"learning_rate": 7.96812749003984e-06,
"loss": 0.1575,
"step": 3205
},
{
"epoch": 4.11,
"learning_rate": 7.911212293682414e-06,
"loss": 0.1469,
"step": 3210
},
{
"epoch": 4.12,
"learning_rate": 7.854297097324987e-06,
"loss": 0.1367,
"step": 3215
},
{
"epoch": 4.12,
"learning_rate": 7.797381900967559e-06,
"loss": 0.1432,
"step": 3220
},
{
"epoch": 4.13,
"learning_rate": 7.74046670461013e-06,
"loss": 0.1375,
"step": 3225
},
{
"epoch": 4.14,
"learning_rate": 7.683551508252704e-06,
"loss": 0.1574,
"step": 3230
},
{
"epoch": 4.14,
"learning_rate": 7.626636311895277e-06,
"loss": 0.1289,
"step": 3235
},
{
"epoch": 4.15,
"learning_rate": 7.569721115537849e-06,
"loss": 0.1744,
"step": 3240
},
{
"epoch": 4.15,
"learning_rate": 7.512805919180422e-06,
"loss": 0.1705,
"step": 3245
},
{
"epoch": 4.16,
"learning_rate": 7.455890722822995e-06,
"loss": 0.1719,
"step": 3250
},
{
"epoch": 4.17,
"learning_rate": 7.398975526465567e-06,
"loss": 0.1454,
"step": 3255
},
{
"epoch": 4.17,
"learning_rate": 7.342060330108139e-06,
"loss": 0.1038,
"step": 3260
},
{
"epoch": 4.18,
"learning_rate": 7.285145133750712e-06,
"loss": 0.1544,
"step": 3265
},
{
"epoch": 4.19,
"learning_rate": 7.228229937393284e-06,
"loss": 0.1299,
"step": 3270
},
{
"epoch": 4.19,
"learning_rate": 7.171314741035858e-06,
"loss": 0.1465,
"step": 3275
},
{
"epoch": 4.2,
"learning_rate": 7.114399544678429e-06,
"loss": 0.1236,
"step": 3280
},
{
"epoch": 4.21,
"learning_rate": 7.057484348321002e-06,
"loss": 0.1329,
"step": 3285
},
{
"epoch": 4.21,
"learning_rate": 7.0005691519635745e-06,
"loss": 0.1716,
"step": 3290
},
{
"epoch": 4.22,
"learning_rate": 6.943653955606148e-06,
"loss": 0.1225,
"step": 3295
},
{
"epoch": 4.23,
"learning_rate": 6.88673875924872e-06,
"loss": 0.1124,
"step": 3300
},
{
"epoch": 4.23,
"learning_rate": 6.829823562891292e-06,
"loss": 0.1425,
"step": 3305
},
{
"epoch": 4.24,
"learning_rate": 6.772908366533865e-06,
"loss": 0.1209,
"step": 3310
},
{
"epoch": 4.24,
"learning_rate": 6.715993170176438e-06,
"loss": 0.1293,
"step": 3315
},
{
"epoch": 4.25,
"learning_rate": 6.6590779738190105e-06,
"loss": 0.1592,
"step": 3320
},
{
"epoch": 4.26,
"learning_rate": 6.602162777461583e-06,
"loss": 0.1525,
"step": 3325
},
{
"epoch": 4.26,
"learning_rate": 6.545247581104155e-06,
"loss": 0.1993,
"step": 3330
},
{
"epoch": 4.27,
"learning_rate": 6.488332384746728e-06,
"loss": 0.1312,
"step": 3335
},
{
"epoch": 4.28,
"learning_rate": 6.431417188389301e-06,
"loss": 0.1318,
"step": 3340
},
{
"epoch": 4.28,
"learning_rate": 6.374501992031873e-06,
"loss": 0.1576,
"step": 3345
},
{
"epoch": 4.29,
"learning_rate": 6.317586795674445e-06,
"loss": 0.1417,
"step": 3350
},
{
"epoch": 4.3,
"learning_rate": 6.260671599317018e-06,
"loss": 0.1696,
"step": 3355
},
{
"epoch": 4.3,
"learning_rate": 6.203756402959591e-06,
"loss": 0.1711,
"step": 3360
},
{
"epoch": 4.31,
"learning_rate": 6.146841206602163e-06,
"loss": 0.187,
"step": 3365
},
{
"epoch": 4.31,
"learning_rate": 6.089926010244736e-06,
"loss": 0.1165,
"step": 3370
},
{
"epoch": 4.32,
"learning_rate": 6.033010813887309e-06,
"loss": 0.1282,
"step": 3375
},
{
"epoch": 4.33,
"learning_rate": 5.976095617529881e-06,
"loss": 0.1528,
"step": 3380
},
{
"epoch": 4.33,
"learning_rate": 5.919180421172453e-06,
"loss": 0.1477,
"step": 3385
},
{
"epoch": 4.34,
"learning_rate": 5.862265224815026e-06,
"loss": 0.1666,
"step": 3390
},
{
"epoch": 4.35,
"learning_rate": 5.805350028457599e-06,
"loss": 0.1881,
"step": 3395
},
{
"epoch": 4.35,
"learning_rate": 5.748434832100172e-06,
"loss": 0.1377,
"step": 3400
},
{
"epoch": 4.36,
"learning_rate": 5.6915196357427435e-06,
"loss": 0.16,
"step": 3405
},
{
"epoch": 4.37,
"learning_rate": 5.634604439385316e-06,
"loss": 0.1488,
"step": 3410
},
{
"epoch": 4.37,
"learning_rate": 5.577689243027889e-06,
"loss": 0.143,
"step": 3415
},
{
"epoch": 4.38,
"learning_rate": 5.520774046670462e-06,
"loss": 0.1414,
"step": 3420
},
{
"epoch": 4.39,
"learning_rate": 5.4638588503130345e-06,
"loss": 0.1979,
"step": 3425
},
{
"epoch": 4.39,
"learning_rate": 5.406943653955606e-06,
"loss": 0.1419,
"step": 3430
},
{
"epoch": 4.4,
"learning_rate": 5.350028457598179e-06,
"loss": 0.1216,
"step": 3435
},
{
"epoch": 4.4,
"learning_rate": 5.293113261240752e-06,
"loss": 0.1433,
"step": 3440
},
{
"epoch": 4.41,
"learning_rate": 5.236198064883325e-06,
"loss": 0.1615,
"step": 3445
},
{
"epoch": 4.42,
"learning_rate": 5.179282868525896e-06,
"loss": 0.1652,
"step": 3450
},
{
"epoch": 4.42,
"learning_rate": 5.122367672168469e-06,
"loss": 0.2001,
"step": 3455
},
{
"epoch": 4.43,
"learning_rate": 5.065452475811042e-06,
"loss": 0.1397,
"step": 3460
},
{
"epoch": 4.44,
"learning_rate": 5.008537279453615e-06,
"loss": 0.1599,
"step": 3465
},
{
"epoch": 4.44,
"learning_rate": 4.951622083096187e-06,
"loss": 0.1167,
"step": 3470
},
{
"epoch": 4.45,
"learning_rate": 4.89470688673876e-06,
"loss": 0.1473,
"step": 3475
},
{
"epoch": 4.46,
"learning_rate": 4.837791690381332e-06,
"loss": 0.1486,
"step": 3480
},
{
"epoch": 4.46,
"learning_rate": 4.780876494023905e-06,
"loss": 0.1604,
"step": 3485
},
{
"epoch": 4.47,
"learning_rate": 4.723961297666477e-06,
"loss": 0.1364,
"step": 3490
},
{
"epoch": 4.47,
"learning_rate": 4.66704610130905e-06,
"loss": 0.183,
"step": 3495
},
{
"epoch": 4.48,
"learning_rate": 4.6101309049516225e-06,
"loss": 0.1803,
"step": 3500
},
{
"epoch": 4.49,
"learning_rate": 4.553215708594195e-06,
"loss": 0.1405,
"step": 3505
},
{
"epoch": 4.49,
"learning_rate": 4.4963005122367675e-06,
"loss": 0.1436,
"step": 3510
},
{
"epoch": 4.5,
"learning_rate": 4.43938531587934e-06,
"loss": 0.1442,
"step": 3515
},
{
"epoch": 4.51,
"learning_rate": 4.382470119521913e-06,
"loss": 0.0979,
"step": 3520
},
{
"epoch": 4.51,
"learning_rate": 4.325554923164485e-06,
"loss": 0.1218,
"step": 3525
},
{
"epoch": 4.52,
"learning_rate": 4.268639726807058e-06,
"loss": 0.1965,
"step": 3530
},
{
"epoch": 4.53,
"learning_rate": 4.21172453044963e-06,
"loss": 0.161,
"step": 3535
},
{
"epoch": 4.53,
"learning_rate": 4.154809334092203e-06,
"loss": 0.1378,
"step": 3540
},
{
"epoch": 4.54,
"learning_rate": 4.097894137734775e-06,
"loss": 0.1333,
"step": 3545
},
{
"epoch": 4.55,
"learning_rate": 4.040978941377348e-06,
"loss": 0.137,
"step": 3550
},
{
"epoch": 4.55,
"learning_rate": 3.98406374501992e-06,
"loss": 0.1652,
"step": 3555
},
{
"epoch": 4.56,
"learning_rate": 3.927148548662494e-06,
"loss": 0.1655,
"step": 3560
},
{
"epoch": 4.56,
"learning_rate": 3.870233352305065e-06,
"loss": 0.1791,
"step": 3565
},
{
"epoch": 4.57,
"learning_rate": 3.8133181559476383e-06,
"loss": 0.128,
"step": 3570
},
{
"epoch": 4.58,
"learning_rate": 3.756402959590211e-06,
"loss": 0.1567,
"step": 3575
},
{
"epoch": 4.58,
"learning_rate": 3.6994877632327834e-06,
"loss": 0.1564,
"step": 3580
},
{
"epoch": 4.59,
"learning_rate": 3.642572566875356e-06,
"loss": 0.1369,
"step": 3585
},
{
"epoch": 4.6,
"learning_rate": 3.585657370517929e-06,
"loss": 0.176,
"step": 3590
},
{
"epoch": 4.6,
"learning_rate": 3.528742174160501e-06,
"loss": 0.1456,
"step": 3595
},
{
"epoch": 4.61,
"learning_rate": 3.471826977803074e-06,
"loss": 0.1346,
"step": 3600
},
{
"epoch": 4.62,
"learning_rate": 3.414911781445646e-06,
"loss": 0.1565,
"step": 3605
},
{
"epoch": 4.62,
"learning_rate": 3.357996585088219e-06,
"loss": 0.1792,
"step": 3610
},
{
"epoch": 4.63,
"learning_rate": 3.3010813887307915e-06,
"loss": 0.1411,
"step": 3615
},
{
"epoch": 4.63,
"learning_rate": 3.244166192373364e-06,
"loss": 0.1847,
"step": 3620
},
{
"epoch": 4.64,
"learning_rate": 3.1872509960159366e-06,
"loss": 0.19,
"step": 3625
},
{
"epoch": 4.65,
"learning_rate": 3.130335799658509e-06,
"loss": 0.1474,
"step": 3630
},
{
"epoch": 4.65,
"learning_rate": 3.0734206033010816e-06,
"loss": 0.1869,
"step": 3635
},
{
"epoch": 4.66,
"learning_rate": 3.0165054069436546e-06,
"loss": 0.1128,
"step": 3640
},
{
"epoch": 4.67,
"learning_rate": 2.9595902105862267e-06,
"loss": 0.1203,
"step": 3645
},
{
"epoch": 4.67,
"learning_rate": 2.9026750142287997e-06,
"loss": 0.1286,
"step": 3650
},
{
"epoch": 4.68,
"learning_rate": 2.8457598178713718e-06,
"loss": 0.12,
"step": 3655
},
{
"epoch": 4.69,
"learning_rate": 2.7888446215139443e-06,
"loss": 0.095,
"step": 3660
},
{
"epoch": 4.69,
"learning_rate": 2.7319294251565172e-06,
"loss": 0.1002,
"step": 3665
},
{
"epoch": 4.7,
"learning_rate": 2.6750142287990894e-06,
"loss": 0.1256,
"step": 3670
},
{
"epoch": 4.71,
"learning_rate": 2.6180990324416623e-06,
"loss": 0.1148,
"step": 3675
},
{
"epoch": 4.71,
"learning_rate": 2.5611838360842344e-06,
"loss": 0.1646,
"step": 3680
},
{
"epoch": 4.72,
"learning_rate": 2.5042686397268074e-06,
"loss": 0.1083,
"step": 3685
},
{
"epoch": 4.72,
"learning_rate": 2.44735344336938e-06,
"loss": 0.1251,
"step": 3690
},
{
"epoch": 4.73,
"learning_rate": 2.3904382470119524e-06,
"loss": 0.1242,
"step": 3695
},
{
"epoch": 4.74,
"learning_rate": 2.333523050654525e-06,
"loss": 0.1671,
"step": 3700
},
{
"epoch": 4.74,
"learning_rate": 2.2766078542970975e-06,
"loss": 0.1442,
"step": 3705
},
{
"epoch": 4.75,
"learning_rate": 2.21969265793967e-06,
"loss": 0.1694,
"step": 3710
},
{
"epoch": 4.76,
"learning_rate": 2.1627774615822426e-06,
"loss": 0.1632,
"step": 3715
},
{
"epoch": 4.76,
"learning_rate": 2.105862265224815e-06,
"loss": 0.1738,
"step": 3720
},
{
"epoch": 4.77,
"learning_rate": 2.0489470688673876e-06,
"loss": 0.1062,
"step": 3725
},
{
"epoch": 4.78,
"learning_rate": 1.99203187250996e-06,
"loss": 0.1395,
"step": 3730
},
{
"epoch": 4.78,
"learning_rate": 1.9351166761525327e-06,
"loss": 0.1321,
"step": 3735
},
{
"epoch": 4.79,
"learning_rate": 1.8782014797951054e-06,
"loss": 0.1251,
"step": 3740
},
{
"epoch": 4.79,
"learning_rate": 1.821286283437678e-06,
"loss": 0.1499,
"step": 3745
},
{
"epoch": 4.8,
"learning_rate": 1.7643710870802505e-06,
"loss": 0.1338,
"step": 3750
},
{
"epoch": 4.81,
"learning_rate": 1.707455890722823e-06,
"loss": 0.1873,
"step": 3755
},
{
"epoch": 4.81,
"learning_rate": 1.6505406943653958e-06,
"loss": 0.1352,
"step": 3760
},
{
"epoch": 4.82,
"learning_rate": 1.5936254980079683e-06,
"loss": 0.1673,
"step": 3765
},
{
"epoch": 4.83,
"learning_rate": 1.5367103016505408e-06,
"loss": 0.1705,
"step": 3770
},
{
"epoch": 4.83,
"learning_rate": 1.4797951052931133e-06,
"loss": 0.1145,
"step": 3775
},
{
"epoch": 4.84,
"learning_rate": 1.4228799089356859e-06,
"loss": 0.0999,
"step": 3780
},
{
"epoch": 4.85,
"learning_rate": 1.3659647125782586e-06,
"loss": 0.1012,
"step": 3785
},
{
"epoch": 4.85,
"learning_rate": 1.3090495162208312e-06,
"loss": 0.1677,
"step": 3790
},
{
"epoch": 4.86,
"learning_rate": 1.2521343198634037e-06,
"loss": 0.169,
"step": 3795
},
{
"epoch": 4.87,
"learning_rate": 1.1952191235059762e-06,
"loss": 0.1844,
"step": 3800
},
{
"epoch": 4.87,
"learning_rate": 1.1383039271485487e-06,
"loss": 0.1701,
"step": 3805
},
{
"epoch": 4.88,
"learning_rate": 1.0813887307911213e-06,
"loss": 0.1319,
"step": 3810
},
{
"epoch": 4.88,
"learning_rate": 1.0244735344336938e-06,
"loss": 0.1546,
"step": 3815
},
{
"epoch": 4.89,
"learning_rate": 9.675583380762663e-07,
"loss": 0.1579,
"step": 3820
},
{
"epoch": 4.9,
"learning_rate": 9.10643141718839e-07,
"loss": 0.1308,
"step": 3825
},
{
"epoch": 4.9,
"learning_rate": 8.537279453614115e-07,
"loss": 0.1453,
"step": 3830
},
{
"epoch": 4.91,
"learning_rate": 7.968127490039841e-07,
"loss": 0.169,
"step": 3835
},
{
"epoch": 4.92,
"learning_rate": 7.398975526465567e-07,
"loss": 0.1191,
"step": 3840
},
{
"epoch": 4.92,
"learning_rate": 6.829823562891293e-07,
"loss": 0.152,
"step": 3845
},
{
"epoch": 4.93,
"learning_rate": 6.260671599317018e-07,
"loss": 0.1275,
"step": 3850
},
{
"epoch": 4.94,
"learning_rate": 5.691519635742744e-07,
"loss": 0.1409,
"step": 3855
},
{
"epoch": 4.94,
"learning_rate": 5.122367672168469e-07,
"loss": 0.1332,
"step": 3860
},
{
"epoch": 4.95,
"learning_rate": 4.553215708594195e-07,
"loss": 0.163,
"step": 3865
},
{
"epoch": 4.95,
"learning_rate": 3.9840637450199207e-07,
"loss": 0.1564,
"step": 3870
},
{
"epoch": 4.96,
"learning_rate": 3.4149117814456466e-07,
"loss": 0.2202,
"step": 3875
},
{
"epoch": 4.97,
"learning_rate": 2.845759817871372e-07,
"loss": 0.1769,
"step": 3880
},
{
"epoch": 4.97,
"learning_rate": 2.2766078542970974e-07,
"loss": 0.1343,
"step": 3885
},
{
"epoch": 4.98,
"learning_rate": 1.7074558907228233e-07,
"loss": 0.1481,
"step": 3890
},
{
"epoch": 4.99,
"learning_rate": 1.1383039271485487e-07,
"loss": 0.1591,
"step": 3895
},
{
"epoch": 4.99,
"learning_rate": 5.6915196357427436e-08,
"loss": 0.1983,
"step": 3900
},
{
"epoch": 5.0,
"learning_rate": 0.0,
"loss": 0.121,
"step": 3905
},
{
"epoch": 5.0,
"eval_accuracy": 0.8735,
"eval_loss": 0.42226287722587585,
"eval_runtime": 60.1747,
"eval_samples_per_second": 166.183,
"eval_steps_per_second": 10.386,
"step": 3905
},
{
"epoch": 5.0,
"step": 3905,
"total_flos": 6.230614598311477e+18,
"train_loss": 0.0,
"train_runtime": 0.1997,
"train_samples_per_second": 1252007.126,
"train_steps_per_second": 19556.351
}
],
"max_steps": 3905,
"num_train_epochs": 5,
"total_flos": 6.230614598311477e+18,
"trial_name": null,
"trial_params": null
}