plantdoctor's picture
End of training
01b8844
raw
history blame
145 kB
{
"best_metric": 0.9982930298719772,
"best_model_checkpoint": "swin-tiny-patch4-window7-224-plant-doctor\\checkpoint-11862",
"epoch": 2.9999367768856295,
"global_step": 11862,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.2122999157540016e-07,
"loss": 3.7171,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 8.424599831508003e-07,
"loss": 3.7074,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 1.2636899747262006e-06,
"loss": 3.7295,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 1.6849199663016007e-06,
"loss": 3.7159,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 2.106149957877001e-06,
"loss": 3.7221,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 2.5273799494524013e-06,
"loss": 3.661,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 2.9486099410278013e-06,
"loss": 3.643,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 3.3698399326032013e-06,
"loss": 3.6464,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 3.7910699241786017e-06,
"loss": 3.6449,
"step": 90
},
{
"epoch": 0.03,
"learning_rate": 4.212299915754002e-06,
"loss": 3.6247,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 4.633529907329403e-06,
"loss": 3.625,
"step": 110
},
{
"epoch": 0.03,
"learning_rate": 5.054759898904803e-06,
"loss": 3.5884,
"step": 120
},
{
"epoch": 0.03,
"learning_rate": 5.475989890480203e-06,
"loss": 3.5969,
"step": 130
},
{
"epoch": 0.04,
"learning_rate": 5.897219882055603e-06,
"loss": 3.5366,
"step": 140
},
{
"epoch": 0.04,
"learning_rate": 6.318449873631002e-06,
"loss": 3.4728,
"step": 150
},
{
"epoch": 0.04,
"learning_rate": 6.739679865206403e-06,
"loss": 3.4796,
"step": 160
},
{
"epoch": 0.04,
"learning_rate": 7.160909856781803e-06,
"loss": 3.4204,
"step": 170
},
{
"epoch": 0.05,
"learning_rate": 7.5821398483572035e-06,
"loss": 3.4229,
"step": 180
},
{
"epoch": 0.05,
"learning_rate": 8.003369839932604e-06,
"loss": 3.3222,
"step": 190
},
{
"epoch": 0.05,
"learning_rate": 8.424599831508003e-06,
"loss": 3.2725,
"step": 200
},
{
"epoch": 0.05,
"learning_rate": 8.845829823083404e-06,
"loss": 3.1691,
"step": 210
},
{
"epoch": 0.06,
"learning_rate": 9.267059814658805e-06,
"loss": 3.0939,
"step": 220
},
{
"epoch": 0.06,
"learning_rate": 9.688289806234204e-06,
"loss": 3.0481,
"step": 230
},
{
"epoch": 0.06,
"learning_rate": 1.0109519797809605e-05,
"loss": 2.94,
"step": 240
},
{
"epoch": 0.06,
"learning_rate": 1.0530749789385004e-05,
"loss": 2.7748,
"step": 250
},
{
"epoch": 0.07,
"learning_rate": 1.0951979780960405e-05,
"loss": 2.6522,
"step": 260
},
{
"epoch": 0.07,
"learning_rate": 1.1373209772535804e-05,
"loss": 2.444,
"step": 270
},
{
"epoch": 0.07,
"learning_rate": 1.1794439764111205e-05,
"loss": 2.4583,
"step": 280
},
{
"epoch": 0.07,
"learning_rate": 1.2215669755686606e-05,
"loss": 2.168,
"step": 290
},
{
"epoch": 0.08,
"learning_rate": 1.2636899747262004e-05,
"loss": 1.9937,
"step": 300
},
{
"epoch": 0.08,
"learning_rate": 1.3058129738837404e-05,
"loss": 1.9586,
"step": 310
},
{
"epoch": 0.08,
"learning_rate": 1.3479359730412805e-05,
"loss": 1.7108,
"step": 320
},
{
"epoch": 0.08,
"learning_rate": 1.3900589721988206e-05,
"loss": 1.6271,
"step": 330
},
{
"epoch": 0.09,
"learning_rate": 1.4321819713563605e-05,
"loss": 1.4738,
"step": 340
},
{
"epoch": 0.09,
"learning_rate": 1.4743049705139006e-05,
"loss": 1.4872,
"step": 350
},
{
"epoch": 0.09,
"learning_rate": 1.5164279696714407e-05,
"loss": 1.3534,
"step": 360
},
{
"epoch": 0.09,
"learning_rate": 1.5585509688289808e-05,
"loss": 1.2978,
"step": 370
},
{
"epoch": 0.1,
"learning_rate": 1.600673967986521e-05,
"loss": 1.0508,
"step": 380
},
{
"epoch": 0.1,
"learning_rate": 1.6427969671440606e-05,
"loss": 1.0588,
"step": 390
},
{
"epoch": 0.1,
"learning_rate": 1.6849199663016007e-05,
"loss": 1.0644,
"step": 400
},
{
"epoch": 0.1,
"learning_rate": 1.7270429654591408e-05,
"loss": 0.9897,
"step": 410
},
{
"epoch": 0.11,
"learning_rate": 1.769165964616681e-05,
"loss": 0.9515,
"step": 420
},
{
"epoch": 0.11,
"learning_rate": 1.811288963774221e-05,
"loss": 0.7189,
"step": 430
},
{
"epoch": 0.11,
"learning_rate": 1.853411962931761e-05,
"loss": 0.7705,
"step": 440
},
{
"epoch": 0.11,
"learning_rate": 1.8955349620893008e-05,
"loss": 0.7983,
"step": 450
},
{
"epoch": 0.12,
"learning_rate": 1.937657961246841e-05,
"loss": 0.6809,
"step": 460
},
{
"epoch": 0.12,
"learning_rate": 1.979780960404381e-05,
"loss": 0.708,
"step": 470
},
{
"epoch": 0.12,
"learning_rate": 2.021903959561921e-05,
"loss": 0.6949,
"step": 480
},
{
"epoch": 0.12,
"learning_rate": 2.064026958719461e-05,
"loss": 0.688,
"step": 490
},
{
"epoch": 0.13,
"learning_rate": 2.106149957877001e-05,
"loss": 0.6239,
"step": 500
},
{
"epoch": 0.13,
"learning_rate": 2.148272957034541e-05,
"loss": 0.7798,
"step": 510
},
{
"epoch": 0.13,
"learning_rate": 2.190395956192081e-05,
"loss": 0.5758,
"step": 520
},
{
"epoch": 0.13,
"learning_rate": 2.2325189553496208e-05,
"loss": 0.4104,
"step": 530
},
{
"epoch": 0.14,
"learning_rate": 2.274641954507161e-05,
"loss": 0.6505,
"step": 540
},
{
"epoch": 0.14,
"learning_rate": 2.316764953664701e-05,
"loss": 0.6455,
"step": 550
},
{
"epoch": 0.14,
"learning_rate": 2.358887952822241e-05,
"loss": 0.5305,
"step": 560
},
{
"epoch": 0.14,
"learning_rate": 2.401010951979781e-05,
"loss": 0.4867,
"step": 570
},
{
"epoch": 0.15,
"learning_rate": 2.4431339511373212e-05,
"loss": 0.5938,
"step": 580
},
{
"epoch": 0.15,
"learning_rate": 2.485256950294861e-05,
"loss": 0.38,
"step": 590
},
{
"epoch": 0.15,
"learning_rate": 2.5273799494524007e-05,
"loss": 0.3926,
"step": 600
},
{
"epoch": 0.15,
"learning_rate": 2.5695029486099408e-05,
"loss": 0.4111,
"step": 610
},
{
"epoch": 0.16,
"learning_rate": 2.611625947767481e-05,
"loss": 0.4342,
"step": 620
},
{
"epoch": 0.16,
"learning_rate": 2.653748946925021e-05,
"loss": 0.6535,
"step": 630
},
{
"epoch": 0.16,
"learning_rate": 2.695871946082561e-05,
"loss": 0.3456,
"step": 640
},
{
"epoch": 0.16,
"learning_rate": 2.737994945240101e-05,
"loss": 0.3302,
"step": 650
},
{
"epoch": 0.17,
"learning_rate": 2.7801179443976412e-05,
"loss": 0.3567,
"step": 660
},
{
"epoch": 0.17,
"learning_rate": 2.822240943555181e-05,
"loss": 0.3142,
"step": 670
},
{
"epoch": 0.17,
"learning_rate": 2.864363942712721e-05,
"loss": 0.2891,
"step": 680
},
{
"epoch": 0.17,
"learning_rate": 2.906486941870261e-05,
"loss": 0.4647,
"step": 690
},
{
"epoch": 0.18,
"learning_rate": 2.9486099410278012e-05,
"loss": 0.3419,
"step": 700
},
{
"epoch": 0.18,
"learning_rate": 2.9907329401853413e-05,
"loss": 0.4307,
"step": 710
},
{
"epoch": 0.18,
"learning_rate": 3.0328559393428814e-05,
"loss": 0.2871,
"step": 720
},
{
"epoch": 0.18,
"learning_rate": 3.0749789385004215e-05,
"loss": 0.4164,
"step": 730
},
{
"epoch": 0.19,
"learning_rate": 3.1171019376579616e-05,
"loss": 0.3037,
"step": 740
},
{
"epoch": 0.19,
"learning_rate": 3.1592249368155016e-05,
"loss": 0.3321,
"step": 750
},
{
"epoch": 0.19,
"learning_rate": 3.201347935973042e-05,
"loss": 0.3677,
"step": 760
},
{
"epoch": 0.19,
"learning_rate": 3.243470935130581e-05,
"loss": 0.2341,
"step": 770
},
{
"epoch": 0.2,
"learning_rate": 3.285593934288121e-05,
"loss": 0.3474,
"step": 780
},
{
"epoch": 0.2,
"learning_rate": 3.327716933445661e-05,
"loss": 0.3857,
"step": 790
},
{
"epoch": 0.2,
"learning_rate": 3.3698399326032014e-05,
"loss": 0.3984,
"step": 800
},
{
"epoch": 0.2,
"learning_rate": 3.4119629317607415e-05,
"loss": 0.2977,
"step": 810
},
{
"epoch": 0.21,
"learning_rate": 3.4540859309182816e-05,
"loss": 0.2937,
"step": 820
},
{
"epoch": 0.21,
"learning_rate": 3.4962089300758216e-05,
"loss": 0.3365,
"step": 830
},
{
"epoch": 0.21,
"learning_rate": 3.538331929233362e-05,
"loss": 0.2323,
"step": 840
},
{
"epoch": 0.21,
"learning_rate": 3.580454928390902e-05,
"loss": 0.3585,
"step": 850
},
{
"epoch": 0.22,
"learning_rate": 3.622577927548442e-05,
"loss": 0.3864,
"step": 860
},
{
"epoch": 0.22,
"learning_rate": 3.664700926705982e-05,
"loss": 0.3779,
"step": 870
},
{
"epoch": 0.22,
"learning_rate": 3.706823925863522e-05,
"loss": 0.4134,
"step": 880
},
{
"epoch": 0.23,
"learning_rate": 3.748946925021062e-05,
"loss": 0.2422,
"step": 890
},
{
"epoch": 0.23,
"learning_rate": 3.7910699241786016e-05,
"loss": 0.2523,
"step": 900
},
{
"epoch": 0.23,
"learning_rate": 3.8331929233361416e-05,
"loss": 0.326,
"step": 910
},
{
"epoch": 0.23,
"learning_rate": 3.875315922493682e-05,
"loss": 0.2949,
"step": 920
},
{
"epoch": 0.24,
"learning_rate": 3.917438921651222e-05,
"loss": 0.2871,
"step": 930
},
{
"epoch": 0.24,
"learning_rate": 3.959561920808762e-05,
"loss": 0.296,
"step": 940
},
{
"epoch": 0.24,
"learning_rate": 4.001684919966302e-05,
"loss": 0.4151,
"step": 950
},
{
"epoch": 0.24,
"learning_rate": 4.043807919123842e-05,
"loss": 0.2998,
"step": 960
},
{
"epoch": 0.25,
"learning_rate": 4.085930918281382e-05,
"loss": 0.3769,
"step": 970
},
{
"epoch": 0.25,
"learning_rate": 4.128053917438922e-05,
"loss": 0.2303,
"step": 980
},
{
"epoch": 0.25,
"learning_rate": 4.1701769165964616e-05,
"loss": 0.2688,
"step": 990
},
{
"epoch": 0.25,
"learning_rate": 4.212299915754002e-05,
"loss": 0.2908,
"step": 1000
},
{
"epoch": 0.26,
"learning_rate": 4.254422914911542e-05,
"loss": 0.2191,
"step": 1010
},
{
"epoch": 0.26,
"learning_rate": 4.296545914069082e-05,
"loss": 0.2606,
"step": 1020
},
{
"epoch": 0.26,
"learning_rate": 4.338668913226622e-05,
"loss": 0.2882,
"step": 1030
},
{
"epoch": 0.26,
"learning_rate": 4.380791912384162e-05,
"loss": 0.3017,
"step": 1040
},
{
"epoch": 0.27,
"learning_rate": 4.4229149115417015e-05,
"loss": 0.3311,
"step": 1050
},
{
"epoch": 0.27,
"learning_rate": 4.4650379106992416e-05,
"loss": 0.3829,
"step": 1060
},
{
"epoch": 0.27,
"learning_rate": 4.5071609098567817e-05,
"loss": 0.2764,
"step": 1070
},
{
"epoch": 0.27,
"learning_rate": 4.549283909014322e-05,
"loss": 0.3362,
"step": 1080
},
{
"epoch": 0.28,
"learning_rate": 4.591406908171862e-05,
"loss": 0.2078,
"step": 1090
},
{
"epoch": 0.28,
"learning_rate": 4.633529907329402e-05,
"loss": 0.3169,
"step": 1100
},
{
"epoch": 0.28,
"learning_rate": 4.675652906486942e-05,
"loss": 0.2368,
"step": 1110
},
{
"epoch": 0.28,
"learning_rate": 4.717775905644482e-05,
"loss": 0.3596,
"step": 1120
},
{
"epoch": 0.29,
"learning_rate": 4.759898904802022e-05,
"loss": 0.1602,
"step": 1130
},
{
"epoch": 0.29,
"learning_rate": 4.802021903959562e-05,
"loss": 0.3137,
"step": 1140
},
{
"epoch": 0.29,
"learning_rate": 4.844144903117102e-05,
"loss": 0.309,
"step": 1150
},
{
"epoch": 0.29,
"learning_rate": 4.8862679022746424e-05,
"loss": 0.1853,
"step": 1160
},
{
"epoch": 0.3,
"learning_rate": 4.9283909014321825e-05,
"loss": 0.2761,
"step": 1170
},
{
"epoch": 0.3,
"learning_rate": 4.970513900589722e-05,
"loss": 0.2711,
"step": 1180
},
{
"epoch": 0.3,
"learning_rate": 4.998594847775176e-05,
"loss": 0.2759,
"step": 1190
},
{
"epoch": 0.3,
"learning_rate": 4.9939110070257614e-05,
"loss": 0.3978,
"step": 1200
},
{
"epoch": 0.31,
"learning_rate": 4.989227166276347e-05,
"loss": 0.1959,
"step": 1210
},
{
"epoch": 0.31,
"learning_rate": 4.984543325526932e-05,
"loss": 0.2756,
"step": 1220
},
{
"epoch": 0.31,
"learning_rate": 4.979859484777518e-05,
"loss": 0.2248,
"step": 1230
},
{
"epoch": 0.31,
"learning_rate": 4.975175644028103e-05,
"loss": 0.2551,
"step": 1240
},
{
"epoch": 0.32,
"learning_rate": 4.970491803278689e-05,
"loss": 0.2153,
"step": 1250
},
{
"epoch": 0.32,
"learning_rate": 4.965807962529274e-05,
"loss": 0.1876,
"step": 1260
},
{
"epoch": 0.32,
"learning_rate": 4.96112412177986e-05,
"loss": 0.245,
"step": 1270
},
{
"epoch": 0.32,
"learning_rate": 4.956440281030445e-05,
"loss": 0.3696,
"step": 1280
},
{
"epoch": 0.33,
"learning_rate": 4.951756440281031e-05,
"loss": 0.4584,
"step": 1290
},
{
"epoch": 0.33,
"learning_rate": 4.9470725995316164e-05,
"loss": 0.3738,
"step": 1300
},
{
"epoch": 0.33,
"learning_rate": 4.942388758782202e-05,
"loss": 0.3187,
"step": 1310
},
{
"epoch": 0.33,
"learning_rate": 4.937704918032787e-05,
"loss": 0.3516,
"step": 1320
},
{
"epoch": 0.34,
"learning_rate": 4.9330210772833727e-05,
"loss": 0.2401,
"step": 1330
},
{
"epoch": 0.34,
"learning_rate": 4.928337236533958e-05,
"loss": 0.2238,
"step": 1340
},
{
"epoch": 0.34,
"learning_rate": 4.9236533957845435e-05,
"loss": 0.137,
"step": 1350
},
{
"epoch": 0.34,
"learning_rate": 4.918969555035129e-05,
"loss": 0.2181,
"step": 1360
},
{
"epoch": 0.35,
"learning_rate": 4.9142857142857144e-05,
"loss": 0.2391,
"step": 1370
},
{
"epoch": 0.35,
"learning_rate": 4.9096018735363e-05,
"loss": 0.2805,
"step": 1380
},
{
"epoch": 0.35,
"learning_rate": 4.904918032786885e-05,
"loss": 0.1143,
"step": 1390
},
{
"epoch": 0.35,
"learning_rate": 4.9002341920374713e-05,
"loss": 0.3073,
"step": 1400
},
{
"epoch": 0.36,
"learning_rate": 4.895550351288056e-05,
"loss": 0.2037,
"step": 1410
},
{
"epoch": 0.36,
"learning_rate": 4.890866510538642e-05,
"loss": 0.2773,
"step": 1420
},
{
"epoch": 0.36,
"learning_rate": 4.8861826697892276e-05,
"loss": 0.2047,
"step": 1430
},
{
"epoch": 0.36,
"learning_rate": 4.881498829039813e-05,
"loss": 0.2622,
"step": 1440
},
{
"epoch": 0.37,
"learning_rate": 4.8768149882903985e-05,
"loss": 0.3069,
"step": 1450
},
{
"epoch": 0.37,
"learning_rate": 4.872131147540984e-05,
"loss": 0.1897,
"step": 1460
},
{
"epoch": 0.37,
"learning_rate": 4.8674473067915694e-05,
"loss": 0.1656,
"step": 1470
},
{
"epoch": 0.37,
"learning_rate": 4.862763466042155e-05,
"loss": 0.1743,
"step": 1480
},
{
"epoch": 0.38,
"learning_rate": 4.85807962529274e-05,
"loss": 0.1976,
"step": 1490
},
{
"epoch": 0.38,
"learning_rate": 4.853395784543326e-05,
"loss": 0.2634,
"step": 1500
},
{
"epoch": 0.38,
"learning_rate": 4.848711943793911e-05,
"loss": 0.1889,
"step": 1510
},
{
"epoch": 0.38,
"learning_rate": 4.8440281030444965e-05,
"loss": 0.2137,
"step": 1520
},
{
"epoch": 0.39,
"learning_rate": 4.8393442622950826e-05,
"loss": 0.2689,
"step": 1530
},
{
"epoch": 0.39,
"learning_rate": 4.8346604215456674e-05,
"loss": 0.1931,
"step": 1540
},
{
"epoch": 0.39,
"learning_rate": 4.8299765807962535e-05,
"loss": 0.2365,
"step": 1550
},
{
"epoch": 0.39,
"learning_rate": 4.825292740046838e-05,
"loss": 0.2045,
"step": 1560
},
{
"epoch": 0.4,
"learning_rate": 4.8206088992974244e-05,
"loss": 0.1551,
"step": 1570
},
{
"epoch": 0.4,
"learning_rate": 4.81592505854801e-05,
"loss": 0.2096,
"step": 1580
},
{
"epoch": 0.4,
"learning_rate": 4.811241217798595e-05,
"loss": 0.164,
"step": 1590
},
{
"epoch": 0.4,
"learning_rate": 4.8065573770491807e-05,
"loss": 0.2657,
"step": 1600
},
{
"epoch": 0.41,
"learning_rate": 4.801873536299766e-05,
"loss": 0.2325,
"step": 1610
},
{
"epoch": 0.41,
"learning_rate": 4.7971896955503515e-05,
"loss": 0.1632,
"step": 1620
},
{
"epoch": 0.41,
"learning_rate": 4.792505854800937e-05,
"loss": 0.317,
"step": 1630
},
{
"epoch": 0.41,
"learning_rate": 4.7878220140515224e-05,
"loss": 0.256,
"step": 1640
},
{
"epoch": 0.42,
"learning_rate": 4.783138173302108e-05,
"loss": 0.2949,
"step": 1650
},
{
"epoch": 0.42,
"learning_rate": 4.778454332552694e-05,
"loss": 0.161,
"step": 1660
},
{
"epoch": 0.42,
"learning_rate": 4.773770491803279e-05,
"loss": 0.1259,
"step": 1670
},
{
"epoch": 0.42,
"learning_rate": 4.769086651053865e-05,
"loss": 0.2078,
"step": 1680
},
{
"epoch": 0.43,
"learning_rate": 4.7644028103044495e-05,
"loss": 0.1698,
"step": 1690
},
{
"epoch": 0.43,
"learning_rate": 4.7597189695550356e-05,
"loss": 0.2128,
"step": 1700
},
{
"epoch": 0.43,
"learning_rate": 4.755035128805621e-05,
"loss": 0.1408,
"step": 1710
},
{
"epoch": 0.43,
"learning_rate": 4.7503512880562065e-05,
"loss": 0.1762,
"step": 1720
},
{
"epoch": 0.44,
"learning_rate": 4.745667447306792e-05,
"loss": 0.1797,
"step": 1730
},
{
"epoch": 0.44,
"learning_rate": 4.7409836065573774e-05,
"loss": 0.1973,
"step": 1740
},
{
"epoch": 0.44,
"learning_rate": 4.736299765807963e-05,
"loss": 0.1881,
"step": 1750
},
{
"epoch": 0.45,
"learning_rate": 4.731615925058548e-05,
"loss": 0.0622,
"step": 1760
},
{
"epoch": 0.45,
"learning_rate": 4.726932084309134e-05,
"loss": 0.1888,
"step": 1770
},
{
"epoch": 0.45,
"learning_rate": 4.722248243559719e-05,
"loss": 0.1894,
"step": 1780
},
{
"epoch": 0.45,
"learning_rate": 4.7175644028103045e-05,
"loss": 0.1152,
"step": 1790
},
{
"epoch": 0.46,
"learning_rate": 4.71288056206089e-05,
"loss": 0.1781,
"step": 1800
},
{
"epoch": 0.46,
"learning_rate": 4.708196721311476e-05,
"loss": 0.1587,
"step": 1810
},
{
"epoch": 0.46,
"learning_rate": 4.703512880562061e-05,
"loss": 0.1364,
"step": 1820
},
{
"epoch": 0.46,
"learning_rate": 4.698829039812647e-05,
"loss": 0.0981,
"step": 1830
},
{
"epoch": 0.47,
"learning_rate": 4.694145199063232e-05,
"loss": 0.092,
"step": 1840
},
{
"epoch": 0.47,
"learning_rate": 4.689461358313818e-05,
"loss": 0.1293,
"step": 1850
},
{
"epoch": 0.47,
"learning_rate": 4.684777517564403e-05,
"loss": 0.1311,
"step": 1860
},
{
"epoch": 0.47,
"learning_rate": 4.6800936768149887e-05,
"loss": 0.1491,
"step": 1870
},
{
"epoch": 0.48,
"learning_rate": 4.675409836065574e-05,
"loss": 0.1186,
"step": 1880
},
{
"epoch": 0.48,
"learning_rate": 4.6707259953161595e-05,
"loss": 0.1392,
"step": 1890
},
{
"epoch": 0.48,
"learning_rate": 4.666042154566745e-05,
"loss": 0.1226,
"step": 1900
},
{
"epoch": 0.48,
"learning_rate": 4.6613583138173304e-05,
"loss": 0.1861,
"step": 1910
},
{
"epoch": 0.49,
"learning_rate": 4.656674473067916e-05,
"loss": 0.2205,
"step": 1920
},
{
"epoch": 0.49,
"learning_rate": 4.651990632318501e-05,
"loss": 0.1569,
"step": 1930
},
{
"epoch": 0.49,
"learning_rate": 4.6473067915690874e-05,
"loss": 0.2244,
"step": 1940
},
{
"epoch": 0.49,
"learning_rate": 4.642622950819672e-05,
"loss": 0.1678,
"step": 1950
},
{
"epoch": 0.5,
"learning_rate": 4.637939110070258e-05,
"loss": 0.1685,
"step": 1960
},
{
"epoch": 0.5,
"learning_rate": 4.633255269320843e-05,
"loss": 0.176,
"step": 1970
},
{
"epoch": 0.5,
"learning_rate": 4.628571428571429e-05,
"loss": 0.1299,
"step": 1980
},
{
"epoch": 0.5,
"learning_rate": 4.6238875878220145e-05,
"loss": 0.2547,
"step": 1990
},
{
"epoch": 0.51,
"learning_rate": 4.6192037470726e-05,
"loss": 0.2223,
"step": 2000
},
{
"epoch": 0.51,
"learning_rate": 4.6145199063231854e-05,
"loss": 0.1414,
"step": 2010
},
{
"epoch": 0.51,
"learning_rate": 4.60983606557377e-05,
"loss": 0.098,
"step": 2020
},
{
"epoch": 0.51,
"learning_rate": 4.605152224824356e-05,
"loss": 0.1897,
"step": 2030
},
{
"epoch": 0.52,
"learning_rate": 4.600468384074942e-05,
"loss": 0.1669,
"step": 2040
},
{
"epoch": 0.52,
"learning_rate": 4.595784543325527e-05,
"loss": 0.1806,
"step": 2050
},
{
"epoch": 0.52,
"learning_rate": 4.5911007025761125e-05,
"loss": 0.1374,
"step": 2060
},
{
"epoch": 0.52,
"learning_rate": 4.5864168618266986e-05,
"loss": 0.1693,
"step": 2070
},
{
"epoch": 0.53,
"learning_rate": 4.5817330210772834e-05,
"loss": 0.2664,
"step": 2080
},
{
"epoch": 0.53,
"learning_rate": 4.5770491803278695e-05,
"loss": 0.1536,
"step": 2090
},
{
"epoch": 0.53,
"learning_rate": 4.572365339578454e-05,
"loss": 0.3288,
"step": 2100
},
{
"epoch": 0.53,
"learning_rate": 4.5676814988290404e-05,
"loss": 0.2277,
"step": 2110
},
{
"epoch": 0.54,
"learning_rate": 4.562997658079626e-05,
"loss": 0.2154,
"step": 2120
},
{
"epoch": 0.54,
"learning_rate": 4.558313817330211e-05,
"loss": 0.1527,
"step": 2130
},
{
"epoch": 0.54,
"learning_rate": 4.5536299765807967e-05,
"loss": 0.0752,
"step": 2140
},
{
"epoch": 0.54,
"learning_rate": 4.5489461358313814e-05,
"loss": 0.1837,
"step": 2150
},
{
"epoch": 0.55,
"learning_rate": 4.5442622950819675e-05,
"loss": 0.1084,
"step": 2160
},
{
"epoch": 0.55,
"learning_rate": 4.539578454332553e-05,
"loss": 0.1038,
"step": 2170
},
{
"epoch": 0.55,
"learning_rate": 4.5348946135831384e-05,
"loss": 0.2331,
"step": 2180
},
{
"epoch": 0.55,
"learning_rate": 4.530210772833724e-05,
"loss": 0.1011,
"step": 2190
},
{
"epoch": 0.56,
"learning_rate": 4.525526932084309e-05,
"loss": 0.1496,
"step": 2200
},
{
"epoch": 0.56,
"learning_rate": 4.520843091334895e-05,
"loss": 0.087,
"step": 2210
},
{
"epoch": 0.56,
"learning_rate": 4.516159250585481e-05,
"loss": 0.176,
"step": 2220
},
{
"epoch": 0.56,
"learning_rate": 4.5114754098360655e-05,
"loss": 0.1596,
"step": 2230
},
{
"epoch": 0.57,
"learning_rate": 4.5067915690866516e-05,
"loss": 0.0907,
"step": 2240
},
{
"epoch": 0.57,
"learning_rate": 4.5021077283372364e-05,
"loss": 0.1813,
"step": 2250
},
{
"epoch": 0.57,
"learning_rate": 4.4974238875878225e-05,
"loss": 0.1603,
"step": 2260
},
{
"epoch": 0.57,
"learning_rate": 4.492740046838408e-05,
"loss": 0.1851,
"step": 2270
},
{
"epoch": 0.58,
"learning_rate": 4.488056206088993e-05,
"loss": 0.1754,
"step": 2280
},
{
"epoch": 0.58,
"learning_rate": 4.483372365339579e-05,
"loss": 0.0963,
"step": 2290
},
{
"epoch": 0.58,
"learning_rate": 4.478688524590164e-05,
"loss": 0.116,
"step": 2300
},
{
"epoch": 0.58,
"learning_rate": 4.47400468384075e-05,
"loss": 0.183,
"step": 2310
},
{
"epoch": 0.59,
"learning_rate": 4.469320843091335e-05,
"loss": 0.1007,
"step": 2320
},
{
"epoch": 0.59,
"learning_rate": 4.4646370023419205e-05,
"loss": 0.1508,
"step": 2330
},
{
"epoch": 0.59,
"learning_rate": 4.459953161592506e-05,
"loss": 0.1712,
"step": 2340
},
{
"epoch": 0.59,
"learning_rate": 4.455269320843092e-05,
"loss": 0.2128,
"step": 2350
},
{
"epoch": 0.6,
"learning_rate": 4.450585480093677e-05,
"loss": 0.2052,
"step": 2360
},
{
"epoch": 0.6,
"learning_rate": 4.445901639344263e-05,
"loss": 0.2082,
"step": 2370
},
{
"epoch": 0.6,
"learning_rate": 4.441217798594848e-05,
"loss": 0.2069,
"step": 2380
},
{
"epoch": 0.6,
"learning_rate": 4.436533957845434e-05,
"loss": 0.1737,
"step": 2390
},
{
"epoch": 0.61,
"learning_rate": 4.431850117096019e-05,
"loss": 0.1723,
"step": 2400
},
{
"epoch": 0.61,
"learning_rate": 4.427166276346604e-05,
"loss": 0.1517,
"step": 2410
},
{
"epoch": 0.61,
"learning_rate": 4.42248243559719e-05,
"loss": 0.1935,
"step": 2420
},
{
"epoch": 0.61,
"learning_rate": 4.417798594847775e-05,
"loss": 0.1481,
"step": 2430
},
{
"epoch": 0.62,
"learning_rate": 4.413114754098361e-05,
"loss": 0.1232,
"step": 2440
},
{
"epoch": 0.62,
"learning_rate": 4.4084309133489464e-05,
"loss": 0.0566,
"step": 2450
},
{
"epoch": 0.62,
"learning_rate": 4.403747072599532e-05,
"loss": 0.1105,
"step": 2460
},
{
"epoch": 0.62,
"learning_rate": 4.399063231850117e-05,
"loss": 0.1898,
"step": 2470
},
{
"epoch": 0.63,
"learning_rate": 4.394379391100703e-05,
"loss": 0.1563,
"step": 2480
},
{
"epoch": 0.63,
"learning_rate": 4.389695550351288e-05,
"loss": 0.1718,
"step": 2490
},
{
"epoch": 0.63,
"learning_rate": 4.385011709601874e-05,
"loss": 0.1567,
"step": 2500
},
{
"epoch": 0.63,
"learning_rate": 4.380327868852459e-05,
"loss": 0.1329,
"step": 2510
},
{
"epoch": 0.64,
"learning_rate": 4.375644028103045e-05,
"loss": 0.3072,
"step": 2520
},
{
"epoch": 0.64,
"learning_rate": 4.3709601873536305e-05,
"loss": 0.0935,
"step": 2530
},
{
"epoch": 0.64,
"learning_rate": 4.366276346604215e-05,
"loss": 0.2451,
"step": 2540
},
{
"epoch": 0.64,
"learning_rate": 4.3615925058548014e-05,
"loss": 0.1152,
"step": 2550
},
{
"epoch": 0.65,
"learning_rate": 4.356908665105386e-05,
"loss": 0.2368,
"step": 2560
},
{
"epoch": 0.65,
"learning_rate": 4.352224824355972e-05,
"loss": 0.1503,
"step": 2570
},
{
"epoch": 0.65,
"learning_rate": 4.347540983606558e-05,
"loss": 0.1519,
"step": 2580
},
{
"epoch": 0.65,
"learning_rate": 4.342857142857143e-05,
"loss": 0.0844,
"step": 2590
},
{
"epoch": 0.66,
"learning_rate": 4.3381733021077285e-05,
"loss": 0.1742,
"step": 2600
},
{
"epoch": 0.66,
"learning_rate": 4.333489461358314e-05,
"loss": 0.1934,
"step": 2610
},
{
"epoch": 0.66,
"learning_rate": 4.3288056206088994e-05,
"loss": 0.1362,
"step": 2620
},
{
"epoch": 0.67,
"learning_rate": 4.3241217798594855e-05,
"loss": 0.1639,
"step": 2630
},
{
"epoch": 0.67,
"learning_rate": 4.31943793911007e-05,
"loss": 0.1136,
"step": 2640
},
{
"epoch": 0.67,
"learning_rate": 4.3147540983606564e-05,
"loss": 0.1682,
"step": 2650
},
{
"epoch": 0.67,
"learning_rate": 4.310070257611241e-05,
"loss": 0.1349,
"step": 2660
},
{
"epoch": 0.68,
"learning_rate": 4.3053864168618265e-05,
"loss": 0.1085,
"step": 2670
},
{
"epoch": 0.68,
"learning_rate": 4.3007025761124127e-05,
"loss": 0.145,
"step": 2680
},
{
"epoch": 0.68,
"learning_rate": 4.2960187353629974e-05,
"loss": 0.1408,
"step": 2690
},
{
"epoch": 0.68,
"learning_rate": 4.2913348946135835e-05,
"loss": 0.215,
"step": 2700
},
{
"epoch": 0.69,
"learning_rate": 4.286651053864169e-05,
"loss": 0.0694,
"step": 2710
},
{
"epoch": 0.69,
"learning_rate": 4.2819672131147544e-05,
"loss": 0.1463,
"step": 2720
},
{
"epoch": 0.69,
"learning_rate": 4.27728337236534e-05,
"loss": 0.1847,
"step": 2730
},
{
"epoch": 0.69,
"learning_rate": 4.272599531615925e-05,
"loss": 0.1274,
"step": 2740
},
{
"epoch": 0.7,
"learning_rate": 4.267915690866511e-05,
"loss": 0.1468,
"step": 2750
},
{
"epoch": 0.7,
"learning_rate": 4.263231850117097e-05,
"loss": 0.2387,
"step": 2760
},
{
"epoch": 0.7,
"learning_rate": 4.2585480093676815e-05,
"loss": 0.1984,
"step": 2770
},
{
"epoch": 0.7,
"learning_rate": 4.2538641686182676e-05,
"loss": 0.1204,
"step": 2780
},
{
"epoch": 0.71,
"learning_rate": 4.2491803278688524e-05,
"loss": 0.2395,
"step": 2790
},
{
"epoch": 0.71,
"learning_rate": 4.244496487119438e-05,
"loss": 0.1443,
"step": 2800
},
{
"epoch": 0.71,
"learning_rate": 4.239812646370024e-05,
"loss": 0.1754,
"step": 2810
},
{
"epoch": 0.71,
"learning_rate": 4.235128805620609e-05,
"loss": 0.0441,
"step": 2820
},
{
"epoch": 0.72,
"learning_rate": 4.230444964871195e-05,
"loss": 0.1436,
"step": 2830
},
{
"epoch": 0.72,
"learning_rate": 4.2257611241217796e-05,
"loss": 0.1218,
"step": 2840
},
{
"epoch": 0.72,
"learning_rate": 4.221077283372366e-05,
"loss": 0.1511,
"step": 2850
},
{
"epoch": 0.72,
"learning_rate": 4.216393442622951e-05,
"loss": 0.1221,
"step": 2860
},
{
"epoch": 0.73,
"learning_rate": 4.2117096018735365e-05,
"loss": 0.0819,
"step": 2870
},
{
"epoch": 0.73,
"learning_rate": 4.207025761124122e-05,
"loss": 0.1189,
"step": 2880
},
{
"epoch": 0.73,
"learning_rate": 4.2023419203747074e-05,
"loss": 0.0878,
"step": 2890
},
{
"epoch": 0.73,
"learning_rate": 4.197658079625293e-05,
"loss": 0.1314,
"step": 2900
},
{
"epoch": 0.74,
"learning_rate": 4.192974238875878e-05,
"loss": 0.093,
"step": 2910
},
{
"epoch": 0.74,
"learning_rate": 4.188290398126464e-05,
"loss": 0.0758,
"step": 2920
},
{
"epoch": 0.74,
"learning_rate": 4.183606557377049e-05,
"loss": 0.1141,
"step": 2930
},
{
"epoch": 0.74,
"learning_rate": 4.178922716627635e-05,
"loss": 0.0519,
"step": 2940
},
{
"epoch": 0.75,
"learning_rate": 4.17423887587822e-05,
"loss": 0.1677,
"step": 2950
},
{
"epoch": 0.75,
"learning_rate": 4.169555035128806e-05,
"loss": 0.1292,
"step": 2960
},
{
"epoch": 0.75,
"learning_rate": 4.164871194379391e-05,
"loss": 0.0952,
"step": 2970
},
{
"epoch": 0.75,
"learning_rate": 4.160187353629977e-05,
"loss": 0.1205,
"step": 2980
},
{
"epoch": 0.76,
"learning_rate": 4.1555035128805624e-05,
"loss": 0.3021,
"step": 2990
},
{
"epoch": 0.76,
"learning_rate": 4.150819672131148e-05,
"loss": 0.1267,
"step": 3000
},
{
"epoch": 0.76,
"learning_rate": 4.146135831381733e-05,
"loss": 0.0811,
"step": 3010
},
{
"epoch": 0.76,
"learning_rate": 4.141451990632319e-05,
"loss": 0.1598,
"step": 3020
},
{
"epoch": 0.77,
"learning_rate": 4.136768149882904e-05,
"loss": 0.0767,
"step": 3030
},
{
"epoch": 0.77,
"learning_rate": 4.1320843091334895e-05,
"loss": 0.1229,
"step": 3040
},
{
"epoch": 0.77,
"learning_rate": 4.127400468384075e-05,
"loss": 0.2994,
"step": 3050
},
{
"epoch": 0.77,
"learning_rate": 4.1227166276346604e-05,
"loss": 0.163,
"step": 3060
},
{
"epoch": 0.78,
"learning_rate": 4.118032786885246e-05,
"loss": 0.2827,
"step": 3070
},
{
"epoch": 0.78,
"learning_rate": 4.113348946135831e-05,
"loss": 0.2022,
"step": 3080
},
{
"epoch": 0.78,
"learning_rate": 4.1086651053864174e-05,
"loss": 0.1131,
"step": 3090
},
{
"epoch": 0.78,
"learning_rate": 4.103981264637002e-05,
"loss": 0.2182,
"step": 3100
},
{
"epoch": 0.79,
"learning_rate": 4.099297423887588e-05,
"loss": 0.0998,
"step": 3110
},
{
"epoch": 0.79,
"learning_rate": 4.094613583138173e-05,
"loss": 0.1207,
"step": 3120
},
{
"epoch": 0.79,
"learning_rate": 4.089929742388759e-05,
"loss": 0.0907,
"step": 3130
},
{
"epoch": 0.79,
"learning_rate": 4.0852459016393445e-05,
"loss": 0.1132,
"step": 3140
},
{
"epoch": 0.8,
"learning_rate": 4.08056206088993e-05,
"loss": 0.1051,
"step": 3150
},
{
"epoch": 0.8,
"learning_rate": 4.0758782201405154e-05,
"loss": 0.1342,
"step": 3160
},
{
"epoch": 0.8,
"learning_rate": 4.071194379391101e-05,
"loss": 0.1189,
"step": 3170
},
{
"epoch": 0.8,
"learning_rate": 4.066510538641686e-05,
"loss": 0.1803,
"step": 3180
},
{
"epoch": 0.81,
"learning_rate": 4.061826697892272e-05,
"loss": 0.1404,
"step": 3190
},
{
"epoch": 0.81,
"learning_rate": 4.057142857142857e-05,
"loss": 0.0852,
"step": 3200
},
{
"epoch": 0.81,
"learning_rate": 4.0524590163934425e-05,
"loss": 0.1545,
"step": 3210
},
{
"epoch": 0.81,
"learning_rate": 4.0477751756440287e-05,
"loss": 0.1064,
"step": 3220
},
{
"epoch": 0.82,
"learning_rate": 4.0430913348946134e-05,
"loss": 0.2163,
"step": 3230
},
{
"epoch": 0.82,
"learning_rate": 4.0384074941451995e-05,
"loss": 0.1679,
"step": 3240
},
{
"epoch": 0.82,
"learning_rate": 4.033723653395784e-05,
"loss": 0.0801,
"step": 3250
},
{
"epoch": 0.82,
"learning_rate": 4.0290398126463704e-05,
"loss": 0.1115,
"step": 3260
},
{
"epoch": 0.83,
"learning_rate": 4.024355971896956e-05,
"loss": 0.0532,
"step": 3270
},
{
"epoch": 0.83,
"learning_rate": 4.019672131147541e-05,
"loss": 0.1702,
"step": 3280
},
{
"epoch": 0.83,
"learning_rate": 4.014988290398127e-05,
"loss": 0.0824,
"step": 3290
},
{
"epoch": 0.83,
"learning_rate": 4.010304449648712e-05,
"loss": 0.1437,
"step": 3300
},
{
"epoch": 0.84,
"learning_rate": 4.0056206088992975e-05,
"loss": 0.1871,
"step": 3310
},
{
"epoch": 0.84,
"learning_rate": 4.000936768149883e-05,
"loss": 0.1321,
"step": 3320
},
{
"epoch": 0.84,
"learning_rate": 3.9962529274004684e-05,
"loss": 0.0973,
"step": 3330
},
{
"epoch": 0.84,
"learning_rate": 3.991569086651054e-05,
"loss": 0.0969,
"step": 3340
},
{
"epoch": 0.85,
"learning_rate": 3.98688524590164e-05,
"loss": 0.0951,
"step": 3350
},
{
"epoch": 0.85,
"learning_rate": 3.982201405152225e-05,
"loss": 0.1461,
"step": 3360
},
{
"epoch": 0.85,
"learning_rate": 3.977517564402811e-05,
"loss": 0.1341,
"step": 3370
},
{
"epoch": 0.85,
"learning_rate": 3.9728337236533956e-05,
"loss": 0.1375,
"step": 3380
},
{
"epoch": 0.86,
"learning_rate": 3.968149882903982e-05,
"loss": 0.1654,
"step": 3390
},
{
"epoch": 0.86,
"learning_rate": 3.963466042154567e-05,
"loss": 0.0967,
"step": 3400
},
{
"epoch": 0.86,
"learning_rate": 3.9587822014051525e-05,
"loss": 0.2045,
"step": 3410
},
{
"epoch": 0.86,
"learning_rate": 3.954098360655738e-05,
"loss": 0.1389,
"step": 3420
},
{
"epoch": 0.87,
"learning_rate": 3.9494145199063234e-05,
"loss": 0.0969,
"step": 3430
},
{
"epoch": 0.87,
"learning_rate": 3.944730679156909e-05,
"loss": 0.1687,
"step": 3440
},
{
"epoch": 0.87,
"learning_rate": 3.940046838407494e-05,
"loss": 0.0629,
"step": 3450
},
{
"epoch": 0.88,
"learning_rate": 3.93536299765808e-05,
"loss": 0.2238,
"step": 3460
},
{
"epoch": 0.88,
"learning_rate": 3.930679156908665e-05,
"loss": 0.1398,
"step": 3470
},
{
"epoch": 0.88,
"learning_rate": 3.9259953161592505e-05,
"loss": 0.1131,
"step": 3480
},
{
"epoch": 0.88,
"learning_rate": 3.921311475409836e-05,
"loss": 0.2312,
"step": 3490
},
{
"epoch": 0.89,
"learning_rate": 3.916627634660422e-05,
"loss": 0.0256,
"step": 3500
},
{
"epoch": 0.89,
"learning_rate": 3.911943793911007e-05,
"loss": 0.0823,
"step": 3510
},
{
"epoch": 0.89,
"learning_rate": 3.907259953161593e-05,
"loss": 0.0673,
"step": 3520
},
{
"epoch": 0.89,
"learning_rate": 3.902576112412178e-05,
"loss": 0.1069,
"step": 3530
},
{
"epoch": 0.9,
"learning_rate": 3.897892271662764e-05,
"loss": 0.1959,
"step": 3540
},
{
"epoch": 0.9,
"learning_rate": 3.893208430913349e-05,
"loss": 0.1046,
"step": 3550
},
{
"epoch": 0.9,
"learning_rate": 3.888524590163935e-05,
"loss": 0.1108,
"step": 3560
},
{
"epoch": 0.9,
"learning_rate": 3.88384074941452e-05,
"loss": 0.1144,
"step": 3570
},
{
"epoch": 0.91,
"learning_rate": 3.8791569086651055e-05,
"loss": 0.0919,
"step": 3580
},
{
"epoch": 0.91,
"learning_rate": 3.874473067915691e-05,
"loss": 0.1189,
"step": 3590
},
{
"epoch": 0.91,
"learning_rate": 3.8697892271662764e-05,
"loss": 0.0686,
"step": 3600
},
{
"epoch": 0.91,
"learning_rate": 3.865105386416862e-05,
"loss": 0.0459,
"step": 3610
},
{
"epoch": 0.92,
"learning_rate": 3.860421545667447e-05,
"loss": 0.1078,
"step": 3620
},
{
"epoch": 0.92,
"learning_rate": 3.8557377049180334e-05,
"loss": 0.1647,
"step": 3630
},
{
"epoch": 0.92,
"learning_rate": 3.851053864168618e-05,
"loss": 0.1086,
"step": 3640
},
{
"epoch": 0.92,
"learning_rate": 3.846370023419204e-05,
"loss": 0.1955,
"step": 3650
},
{
"epoch": 0.93,
"learning_rate": 3.841686182669789e-05,
"loss": 0.0901,
"step": 3660
},
{
"epoch": 0.93,
"learning_rate": 3.837002341920375e-05,
"loss": 0.045,
"step": 3670
},
{
"epoch": 0.93,
"learning_rate": 3.8323185011709605e-05,
"loss": 0.1867,
"step": 3680
},
{
"epoch": 0.93,
"learning_rate": 3.827634660421546e-05,
"loss": 0.1367,
"step": 3690
},
{
"epoch": 0.94,
"learning_rate": 3.8229508196721314e-05,
"loss": 0.192,
"step": 3700
},
{
"epoch": 0.94,
"learning_rate": 3.818266978922717e-05,
"loss": 0.1667,
"step": 3710
},
{
"epoch": 0.94,
"learning_rate": 3.813583138173302e-05,
"loss": 0.1011,
"step": 3720
},
{
"epoch": 0.94,
"learning_rate": 3.808899297423888e-05,
"loss": 0.0572,
"step": 3730
},
{
"epoch": 0.95,
"learning_rate": 3.804215456674473e-05,
"loss": 0.1221,
"step": 3740
},
{
"epoch": 0.95,
"learning_rate": 3.7995316159250585e-05,
"loss": 0.0624,
"step": 3750
},
{
"epoch": 0.95,
"learning_rate": 3.794847775175644e-05,
"loss": 0.0972,
"step": 3760
},
{
"epoch": 0.95,
"learning_rate": 3.7901639344262294e-05,
"loss": 0.1573,
"step": 3770
},
{
"epoch": 0.96,
"learning_rate": 3.7854800936768155e-05,
"loss": 0.1313,
"step": 3780
},
{
"epoch": 0.96,
"learning_rate": 3.7807962529274e-05,
"loss": 0.0879,
"step": 3790
},
{
"epoch": 0.96,
"learning_rate": 3.7761124121779864e-05,
"loss": 0.0838,
"step": 3800
},
{
"epoch": 0.96,
"learning_rate": 3.771428571428572e-05,
"loss": 0.1345,
"step": 3810
},
{
"epoch": 0.97,
"learning_rate": 3.766744730679157e-05,
"loss": 0.1336,
"step": 3820
},
{
"epoch": 0.97,
"learning_rate": 3.762060889929743e-05,
"loss": 0.1363,
"step": 3830
},
{
"epoch": 0.97,
"learning_rate": 3.757377049180328e-05,
"loss": 0.1429,
"step": 3840
},
{
"epoch": 0.97,
"learning_rate": 3.7526932084309135e-05,
"loss": 0.2423,
"step": 3850
},
{
"epoch": 0.98,
"learning_rate": 3.748009367681499e-05,
"loss": 0.0836,
"step": 3860
},
{
"epoch": 0.98,
"learning_rate": 3.7433255269320844e-05,
"loss": 0.1646,
"step": 3870
},
{
"epoch": 0.98,
"learning_rate": 3.73864168618267e-05,
"loss": 0.1477,
"step": 3880
},
{
"epoch": 0.98,
"learning_rate": 3.733957845433255e-05,
"loss": 0.0211,
"step": 3890
},
{
"epoch": 0.99,
"learning_rate": 3.729274004683841e-05,
"loss": 0.0819,
"step": 3900
},
{
"epoch": 0.99,
"learning_rate": 3.724590163934427e-05,
"loss": 0.1287,
"step": 3910
},
{
"epoch": 0.99,
"learning_rate": 3.7199063231850116e-05,
"loss": 0.0839,
"step": 3920
},
{
"epoch": 0.99,
"learning_rate": 3.715222482435598e-05,
"loss": 0.1479,
"step": 3930
},
{
"epoch": 1.0,
"learning_rate": 3.7105386416861824e-05,
"loss": 0.2216,
"step": 3940
},
{
"epoch": 1.0,
"learning_rate": 3.7058548009367685e-05,
"loss": 0.0643,
"step": 3950
},
{
"epoch": 1.0,
"eval_accuracy": 0.9933143669985776,
"eval_loss": 0.021845383569598198,
"eval_runtime": 85.6521,
"eval_samples_per_second": 82.076,
"eval_steps_per_second": 20.525,
"step": 3954
},
{
"epoch": 1.0,
"learning_rate": 3.701170960187354e-05,
"loss": 0.2868,
"step": 3960
},
{
"epoch": 1.0,
"learning_rate": 3.6964871194379394e-05,
"loss": 0.0618,
"step": 3970
},
{
"epoch": 1.01,
"learning_rate": 3.691803278688525e-05,
"loss": 0.0459,
"step": 3980
},
{
"epoch": 1.01,
"learning_rate": 3.68711943793911e-05,
"loss": 0.191,
"step": 3990
},
{
"epoch": 1.01,
"learning_rate": 3.682435597189696e-05,
"loss": 0.1108,
"step": 4000
},
{
"epoch": 1.01,
"learning_rate": 3.677751756440281e-05,
"loss": 0.0945,
"step": 4010
},
{
"epoch": 1.02,
"learning_rate": 3.6730679156908665e-05,
"loss": 0.1185,
"step": 4020
},
{
"epoch": 1.02,
"learning_rate": 3.668384074941452e-05,
"loss": 0.1311,
"step": 4030
},
{
"epoch": 1.02,
"learning_rate": 3.663700234192038e-05,
"loss": 0.1305,
"step": 4040
},
{
"epoch": 1.02,
"learning_rate": 3.659016393442623e-05,
"loss": 0.1185,
"step": 4050
},
{
"epoch": 1.03,
"learning_rate": 3.654332552693209e-05,
"loss": 0.0986,
"step": 4060
},
{
"epoch": 1.03,
"learning_rate": 3.649648711943794e-05,
"loss": 0.1849,
"step": 4070
},
{
"epoch": 1.03,
"learning_rate": 3.64496487119438e-05,
"loss": 0.1761,
"step": 4080
},
{
"epoch": 1.03,
"learning_rate": 3.640281030444965e-05,
"loss": 0.0628,
"step": 4090
},
{
"epoch": 1.04,
"learning_rate": 3.635597189695551e-05,
"loss": 0.1002,
"step": 4100
},
{
"epoch": 1.04,
"learning_rate": 3.630913348946136e-05,
"loss": 0.1463,
"step": 4110
},
{
"epoch": 1.04,
"learning_rate": 3.6262295081967215e-05,
"loss": 0.0106,
"step": 4120
},
{
"epoch": 1.04,
"learning_rate": 3.621545667447307e-05,
"loss": 0.0386,
"step": 4130
},
{
"epoch": 1.05,
"learning_rate": 3.6168618266978924e-05,
"loss": 0.0981,
"step": 4140
},
{
"epoch": 1.05,
"learning_rate": 3.612177985948478e-05,
"loss": 0.0836,
"step": 4150
},
{
"epoch": 1.05,
"learning_rate": 3.607494145199063e-05,
"loss": 0.1302,
"step": 4160
},
{
"epoch": 1.05,
"learning_rate": 3.602810304449649e-05,
"loss": 0.2256,
"step": 4170
},
{
"epoch": 1.06,
"learning_rate": 3.598126463700234e-05,
"loss": 0.071,
"step": 4180
},
{
"epoch": 1.06,
"learning_rate": 3.59344262295082e-05,
"loss": 0.1185,
"step": 4190
},
{
"epoch": 1.06,
"learning_rate": 3.588758782201405e-05,
"loss": 0.0575,
"step": 4200
},
{
"epoch": 1.06,
"learning_rate": 3.584074941451991e-05,
"loss": 0.1338,
"step": 4210
},
{
"epoch": 1.07,
"learning_rate": 3.5793911007025765e-05,
"loss": 0.2642,
"step": 4220
},
{
"epoch": 1.07,
"learning_rate": 3.574707259953162e-05,
"loss": 0.0502,
"step": 4230
},
{
"epoch": 1.07,
"learning_rate": 3.5700234192037474e-05,
"loss": 0.1999,
"step": 4240
},
{
"epoch": 1.07,
"learning_rate": 3.565339578454333e-05,
"loss": 0.1458,
"step": 4250
},
{
"epoch": 1.08,
"learning_rate": 3.560655737704918e-05,
"loss": 0.0774,
"step": 4260
},
{
"epoch": 1.08,
"learning_rate": 3.555971896955504e-05,
"loss": 0.0657,
"step": 4270
},
{
"epoch": 1.08,
"learning_rate": 3.551288056206089e-05,
"loss": 0.2004,
"step": 4280
},
{
"epoch": 1.08,
"learning_rate": 3.5466042154566746e-05,
"loss": 0.154,
"step": 4290
},
{
"epoch": 1.09,
"learning_rate": 3.54192037470726e-05,
"loss": 0.1239,
"step": 4300
},
{
"epoch": 1.09,
"learning_rate": 3.5372365339578454e-05,
"loss": 0.0925,
"step": 4310
},
{
"epoch": 1.09,
"learning_rate": 3.5325526932084315e-05,
"loss": 0.0578,
"step": 4320
},
{
"epoch": 1.1,
"learning_rate": 3.527868852459016e-05,
"loss": 0.1487,
"step": 4330
},
{
"epoch": 1.1,
"learning_rate": 3.5231850117096024e-05,
"loss": 0.0967,
"step": 4340
},
{
"epoch": 1.1,
"learning_rate": 3.518501170960187e-05,
"loss": 0.1212,
"step": 4350
},
{
"epoch": 1.1,
"learning_rate": 3.513817330210773e-05,
"loss": 0.0785,
"step": 4360
},
{
"epoch": 1.11,
"learning_rate": 3.509133489461359e-05,
"loss": 0.0591,
"step": 4370
},
{
"epoch": 1.11,
"learning_rate": 3.504449648711944e-05,
"loss": 0.2875,
"step": 4380
},
{
"epoch": 1.11,
"learning_rate": 3.4997658079625295e-05,
"loss": 0.073,
"step": 4390
},
{
"epoch": 1.11,
"learning_rate": 3.495081967213115e-05,
"loss": 0.0973,
"step": 4400
},
{
"epoch": 1.12,
"learning_rate": 3.4903981264637004e-05,
"loss": 0.129,
"step": 4410
},
{
"epoch": 1.12,
"learning_rate": 3.485714285714286e-05,
"loss": 0.0173,
"step": 4420
},
{
"epoch": 1.12,
"learning_rate": 3.481030444964871e-05,
"loss": 0.1036,
"step": 4430
},
{
"epoch": 1.12,
"learning_rate": 3.476346604215457e-05,
"loss": 0.0317,
"step": 4440
},
{
"epoch": 1.13,
"learning_rate": 3.471662763466043e-05,
"loss": 0.0288,
"step": 4450
},
{
"epoch": 1.13,
"learning_rate": 3.4669789227166276e-05,
"loss": 0.027,
"step": 4460
},
{
"epoch": 1.13,
"learning_rate": 3.462295081967214e-05,
"loss": 0.1465,
"step": 4470
},
{
"epoch": 1.13,
"learning_rate": 3.4576112412177984e-05,
"loss": 0.0985,
"step": 4480
},
{
"epoch": 1.14,
"learning_rate": 3.4529274004683845e-05,
"loss": 0.0408,
"step": 4490
},
{
"epoch": 1.14,
"learning_rate": 3.44824355971897e-05,
"loss": 0.0449,
"step": 4500
},
{
"epoch": 1.14,
"learning_rate": 3.4435597189695554e-05,
"loss": 0.0284,
"step": 4510
},
{
"epoch": 1.14,
"learning_rate": 3.438875878220141e-05,
"loss": 0.0241,
"step": 4520
},
{
"epoch": 1.15,
"learning_rate": 3.434192037470726e-05,
"loss": 0.0843,
"step": 4530
},
{
"epoch": 1.15,
"learning_rate": 3.429508196721312e-05,
"loss": 0.1491,
"step": 4540
},
{
"epoch": 1.15,
"learning_rate": 3.424824355971897e-05,
"loss": 0.2487,
"step": 4550
},
{
"epoch": 1.15,
"learning_rate": 3.4201405152224826e-05,
"loss": 0.0894,
"step": 4560
},
{
"epoch": 1.16,
"learning_rate": 3.415456674473068e-05,
"loss": 0.0521,
"step": 4570
},
{
"epoch": 1.16,
"learning_rate": 3.4107728337236534e-05,
"loss": 0.0859,
"step": 4580
},
{
"epoch": 1.16,
"learning_rate": 3.406088992974239e-05,
"loss": 0.0628,
"step": 4590
},
{
"epoch": 1.16,
"learning_rate": 3.401405152224825e-05,
"loss": 0.063,
"step": 4600
},
{
"epoch": 1.17,
"learning_rate": 3.39672131147541e-05,
"loss": 0.0478,
"step": 4610
},
{
"epoch": 1.17,
"learning_rate": 3.392037470725996e-05,
"loss": 0.0796,
"step": 4620
},
{
"epoch": 1.17,
"learning_rate": 3.387353629976581e-05,
"loss": 0.0631,
"step": 4630
},
{
"epoch": 1.17,
"learning_rate": 3.382669789227167e-05,
"loss": 0.1418,
"step": 4640
},
{
"epoch": 1.18,
"learning_rate": 3.377985948477752e-05,
"loss": 0.1141,
"step": 4650
},
{
"epoch": 1.18,
"learning_rate": 3.3733021077283375e-05,
"loss": 0.0353,
"step": 4660
},
{
"epoch": 1.18,
"learning_rate": 3.368618266978923e-05,
"loss": 0.0416,
"step": 4670
},
{
"epoch": 1.18,
"learning_rate": 3.3639344262295084e-05,
"loss": 0.1117,
"step": 4680
},
{
"epoch": 1.19,
"learning_rate": 3.359250585480094e-05,
"loss": 0.086,
"step": 4690
},
{
"epoch": 1.19,
"learning_rate": 3.354566744730679e-05,
"loss": 0.1197,
"step": 4700
},
{
"epoch": 1.19,
"learning_rate": 3.349882903981265e-05,
"loss": 0.1911,
"step": 4710
},
{
"epoch": 1.19,
"learning_rate": 3.34519906323185e-05,
"loss": 0.1228,
"step": 4720
},
{
"epoch": 1.2,
"learning_rate": 3.340515222482436e-05,
"loss": 0.1137,
"step": 4730
},
{
"epoch": 1.2,
"learning_rate": 3.335831381733021e-05,
"loss": 0.1038,
"step": 4740
},
{
"epoch": 1.2,
"learning_rate": 3.331147540983607e-05,
"loss": 0.1766,
"step": 4750
},
{
"epoch": 1.2,
"learning_rate": 3.326463700234192e-05,
"loss": 0.0841,
"step": 4760
},
{
"epoch": 1.21,
"learning_rate": 3.321779859484778e-05,
"loss": 0.1158,
"step": 4770
},
{
"epoch": 1.21,
"learning_rate": 3.3170960187353634e-05,
"loss": 0.0175,
"step": 4780
},
{
"epoch": 1.21,
"learning_rate": 3.312412177985949e-05,
"loss": 0.0551,
"step": 4790
},
{
"epoch": 1.21,
"learning_rate": 3.307728337236534e-05,
"loss": 0.1673,
"step": 4800
},
{
"epoch": 1.22,
"learning_rate": 3.303044496487119e-05,
"loss": 0.034,
"step": 4810
},
{
"epoch": 1.22,
"learning_rate": 3.298360655737705e-05,
"loss": 0.0841,
"step": 4820
},
{
"epoch": 1.22,
"learning_rate": 3.2936768149882906e-05,
"loss": 0.0603,
"step": 4830
},
{
"epoch": 1.22,
"learning_rate": 3.288992974238876e-05,
"loss": 0.0859,
"step": 4840
},
{
"epoch": 1.23,
"learning_rate": 3.2843091334894614e-05,
"loss": 0.1987,
"step": 4850
},
{
"epoch": 1.23,
"learning_rate": 3.2796252927400475e-05,
"loss": 0.1442,
"step": 4860
},
{
"epoch": 1.23,
"learning_rate": 3.274941451990632e-05,
"loss": 0.0848,
"step": 4870
},
{
"epoch": 1.23,
"learning_rate": 3.2702576112412184e-05,
"loss": 0.0903,
"step": 4880
},
{
"epoch": 1.24,
"learning_rate": 3.265573770491803e-05,
"loss": 0.0417,
"step": 4890
},
{
"epoch": 1.24,
"learning_rate": 3.260889929742389e-05,
"loss": 0.0562,
"step": 4900
},
{
"epoch": 1.24,
"learning_rate": 3.256206088992975e-05,
"loss": 0.0326,
"step": 4910
},
{
"epoch": 1.24,
"learning_rate": 3.25152224824356e-05,
"loss": 0.0828,
"step": 4920
},
{
"epoch": 1.25,
"learning_rate": 3.2468384074941455e-05,
"loss": 0.0603,
"step": 4930
},
{
"epoch": 1.25,
"learning_rate": 3.24215456674473e-05,
"loss": 0.0507,
"step": 4940
},
{
"epoch": 1.25,
"learning_rate": 3.2374707259953164e-05,
"loss": 0.1276,
"step": 4950
},
{
"epoch": 1.25,
"learning_rate": 3.232786885245902e-05,
"loss": 0.1803,
"step": 4960
},
{
"epoch": 1.26,
"learning_rate": 3.228103044496487e-05,
"loss": 0.221,
"step": 4970
},
{
"epoch": 1.26,
"learning_rate": 3.223419203747073e-05,
"loss": 0.1683,
"step": 4980
},
{
"epoch": 1.26,
"learning_rate": 3.218735362997658e-05,
"loss": 0.1163,
"step": 4990
},
{
"epoch": 1.26,
"learning_rate": 3.2140515222482436e-05,
"loss": 0.0774,
"step": 5000
},
{
"epoch": 1.27,
"learning_rate": 3.20936768149883e-05,
"loss": 0.052,
"step": 5010
},
{
"epoch": 1.27,
"learning_rate": 3.2046838407494144e-05,
"loss": 0.0418,
"step": 5020
},
{
"epoch": 1.27,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.1452,
"step": 5030
},
{
"epoch": 1.27,
"learning_rate": 3.195316159250585e-05,
"loss": 0.0745,
"step": 5040
},
{
"epoch": 1.28,
"learning_rate": 3.1906323185011714e-05,
"loss": 0.0295,
"step": 5050
},
{
"epoch": 1.28,
"learning_rate": 3.185948477751757e-05,
"loss": 0.0333,
"step": 5060
},
{
"epoch": 1.28,
"learning_rate": 3.1812646370023416e-05,
"loss": 0.0449,
"step": 5070
},
{
"epoch": 1.28,
"learning_rate": 3.176580796252928e-05,
"loss": 0.295,
"step": 5080
},
{
"epoch": 1.29,
"learning_rate": 3.171896955503513e-05,
"loss": 0.1211,
"step": 5090
},
{
"epoch": 1.29,
"learning_rate": 3.1672131147540986e-05,
"loss": 0.0511,
"step": 5100
},
{
"epoch": 1.29,
"learning_rate": 3.162529274004684e-05,
"loss": 0.1489,
"step": 5110
},
{
"epoch": 1.29,
"learning_rate": 3.1578454332552694e-05,
"loss": 0.0948,
"step": 5120
},
{
"epoch": 1.3,
"learning_rate": 3.153161592505855e-05,
"loss": 0.0997,
"step": 5130
},
{
"epoch": 1.3,
"learning_rate": 3.148477751756441e-05,
"loss": 0.1305,
"step": 5140
},
{
"epoch": 1.3,
"learning_rate": 3.143793911007026e-05,
"loss": 0.072,
"step": 5150
},
{
"epoch": 1.3,
"learning_rate": 3.139110070257612e-05,
"loss": 0.0697,
"step": 5160
},
{
"epoch": 1.31,
"learning_rate": 3.1344262295081966e-05,
"loss": 0.0676,
"step": 5170
},
{
"epoch": 1.31,
"learning_rate": 3.129742388758782e-05,
"loss": 0.1833,
"step": 5180
},
{
"epoch": 1.31,
"learning_rate": 3.125058548009368e-05,
"loss": 0.0593,
"step": 5190
},
{
"epoch": 1.32,
"learning_rate": 3.120374707259953e-05,
"loss": 0.1186,
"step": 5200
},
{
"epoch": 1.32,
"learning_rate": 3.115690866510539e-05,
"loss": 0.1424,
"step": 5210
},
{
"epoch": 1.32,
"learning_rate": 3.111007025761124e-05,
"loss": 0.1525,
"step": 5220
},
{
"epoch": 1.32,
"learning_rate": 3.10632318501171e-05,
"loss": 0.1396,
"step": 5230
},
{
"epoch": 1.33,
"learning_rate": 3.101639344262295e-05,
"loss": 0.0862,
"step": 5240
},
{
"epoch": 1.33,
"learning_rate": 3.096955503512881e-05,
"loss": 0.1312,
"step": 5250
},
{
"epoch": 1.33,
"learning_rate": 3.092271662763466e-05,
"loss": 0.1161,
"step": 5260
},
{
"epoch": 1.33,
"learning_rate": 3.087587822014052e-05,
"loss": 0.0988,
"step": 5270
},
{
"epoch": 1.34,
"learning_rate": 3.082903981264637e-05,
"loss": 0.062,
"step": 5280
},
{
"epoch": 1.34,
"learning_rate": 3.078220140515223e-05,
"loss": 0.0698,
"step": 5290
},
{
"epoch": 1.34,
"learning_rate": 3.073536299765808e-05,
"loss": 0.0168,
"step": 5300
},
{
"epoch": 1.34,
"learning_rate": 3.068852459016393e-05,
"loss": 0.0974,
"step": 5310
},
{
"epoch": 1.35,
"learning_rate": 3.0641686182669794e-05,
"loss": 0.0402,
"step": 5320
},
{
"epoch": 1.35,
"learning_rate": 3.059484777517564e-05,
"loss": 0.0492,
"step": 5330
},
{
"epoch": 1.35,
"learning_rate": 3.05480093676815e-05,
"loss": 0.199,
"step": 5340
},
{
"epoch": 1.35,
"learning_rate": 3.0501170960187354e-05,
"loss": 0.1129,
"step": 5350
},
{
"epoch": 1.36,
"learning_rate": 3.045433255269321e-05,
"loss": 0.0502,
"step": 5360
},
{
"epoch": 1.36,
"learning_rate": 3.0407494145199066e-05,
"loss": 0.2362,
"step": 5370
},
{
"epoch": 1.36,
"learning_rate": 3.036065573770492e-05,
"loss": 0.0949,
"step": 5380
},
{
"epoch": 1.36,
"learning_rate": 3.0313817330210774e-05,
"loss": 0.1662,
"step": 5390
},
{
"epoch": 1.37,
"learning_rate": 3.0266978922716625e-05,
"loss": 0.057,
"step": 5400
},
{
"epoch": 1.37,
"learning_rate": 3.0220140515222483e-05,
"loss": 0.0261,
"step": 5410
},
{
"epoch": 1.37,
"learning_rate": 3.017330210772834e-05,
"loss": 0.0979,
"step": 5420
},
{
"epoch": 1.37,
"learning_rate": 3.012646370023419e-05,
"loss": 0.04,
"step": 5430
},
{
"epoch": 1.38,
"learning_rate": 3.007962529274005e-05,
"loss": 0.0524,
"step": 5440
},
{
"epoch": 1.38,
"learning_rate": 3.00327868852459e-05,
"loss": 0.0511,
"step": 5450
},
{
"epoch": 1.38,
"learning_rate": 2.9985948477751758e-05,
"loss": 0.0257,
"step": 5460
},
{
"epoch": 1.38,
"learning_rate": 2.9939110070257615e-05,
"loss": 0.0306,
"step": 5470
},
{
"epoch": 1.39,
"learning_rate": 2.9892271662763466e-05,
"loss": 0.0335,
"step": 5480
},
{
"epoch": 1.39,
"learning_rate": 2.9845433255269324e-05,
"loss": 0.1739,
"step": 5490
},
{
"epoch": 1.39,
"learning_rate": 2.979859484777518e-05,
"loss": 0.0575,
"step": 5500
},
{
"epoch": 1.39,
"learning_rate": 2.9751756440281033e-05,
"loss": 0.0334,
"step": 5510
},
{
"epoch": 1.4,
"learning_rate": 2.9704918032786887e-05,
"loss": 0.1308,
"step": 5520
},
{
"epoch": 1.4,
"learning_rate": 2.9658079625292738e-05,
"loss": 0.073,
"step": 5530
},
{
"epoch": 1.4,
"learning_rate": 2.9611241217798596e-05,
"loss": 0.0767,
"step": 5540
},
{
"epoch": 1.4,
"learning_rate": 2.9564402810304453e-05,
"loss": 0.1279,
"step": 5550
},
{
"epoch": 1.41,
"learning_rate": 2.9517564402810304e-05,
"loss": 0.0809,
"step": 5560
},
{
"epoch": 1.41,
"learning_rate": 2.9470725995316162e-05,
"loss": 0.0748,
"step": 5570
},
{
"epoch": 1.41,
"learning_rate": 2.9423887587822013e-05,
"loss": 0.032,
"step": 5580
},
{
"epoch": 1.41,
"learning_rate": 2.937704918032787e-05,
"loss": 0.1298,
"step": 5590
},
{
"epoch": 1.42,
"learning_rate": 2.9330210772833728e-05,
"loss": 0.133,
"step": 5600
},
{
"epoch": 1.42,
"learning_rate": 2.928337236533958e-05,
"loss": 0.0043,
"step": 5610
},
{
"epoch": 1.42,
"learning_rate": 2.9236533957845437e-05,
"loss": 0.0684,
"step": 5620
},
{
"epoch": 1.42,
"learning_rate": 2.9189695550351288e-05,
"loss": 0.0521,
"step": 5630
},
{
"epoch": 1.43,
"learning_rate": 2.9142857142857146e-05,
"loss": 0.066,
"step": 5640
},
{
"epoch": 1.43,
"learning_rate": 2.9096018735363e-05,
"loss": 0.066,
"step": 5650
},
{
"epoch": 1.43,
"learning_rate": 2.904918032786885e-05,
"loss": 0.0702,
"step": 5660
},
{
"epoch": 1.43,
"learning_rate": 2.900234192037471e-05,
"loss": 0.1099,
"step": 5670
},
{
"epoch": 1.44,
"learning_rate": 2.895550351288056e-05,
"loss": 0.192,
"step": 5680
},
{
"epoch": 1.44,
"learning_rate": 2.8908665105386417e-05,
"loss": 0.0809,
"step": 5690
},
{
"epoch": 1.44,
"learning_rate": 2.8861826697892275e-05,
"loss": 0.0728,
"step": 5700
},
{
"epoch": 1.44,
"learning_rate": 2.8814988290398126e-05,
"loss": 0.1393,
"step": 5710
},
{
"epoch": 1.45,
"learning_rate": 2.8768149882903983e-05,
"loss": 0.1557,
"step": 5720
},
{
"epoch": 1.45,
"learning_rate": 2.872131147540984e-05,
"loss": 0.059,
"step": 5730
},
{
"epoch": 1.45,
"learning_rate": 2.8674473067915692e-05,
"loss": 0.0598,
"step": 5740
},
{
"epoch": 1.45,
"learning_rate": 2.862763466042155e-05,
"loss": 0.2267,
"step": 5750
},
{
"epoch": 1.46,
"learning_rate": 2.85807962529274e-05,
"loss": 0.1091,
"step": 5760
},
{
"epoch": 1.46,
"learning_rate": 2.853395784543326e-05,
"loss": 0.0604,
"step": 5770
},
{
"epoch": 1.46,
"learning_rate": 2.8487119437939113e-05,
"loss": 0.1624,
"step": 5780
},
{
"epoch": 1.46,
"learning_rate": 2.8440281030444964e-05,
"loss": 0.0806,
"step": 5790
},
{
"epoch": 1.47,
"learning_rate": 2.839344262295082e-05,
"loss": 0.1126,
"step": 5800
},
{
"epoch": 1.47,
"learning_rate": 2.8346604215456672e-05,
"loss": 0.054,
"step": 5810
},
{
"epoch": 1.47,
"learning_rate": 2.829976580796253e-05,
"loss": 0.1462,
"step": 5820
},
{
"epoch": 1.47,
"learning_rate": 2.8252927400468388e-05,
"loss": 0.0856,
"step": 5830
},
{
"epoch": 1.48,
"learning_rate": 2.820608899297424e-05,
"loss": 0.1068,
"step": 5840
},
{
"epoch": 1.48,
"learning_rate": 2.8159250585480096e-05,
"loss": 0.1461,
"step": 5850
},
{
"epoch": 1.48,
"learning_rate": 2.8112412177985947e-05,
"loss": 0.0388,
"step": 5860
},
{
"epoch": 1.48,
"learning_rate": 2.8065573770491805e-05,
"loss": 0.039,
"step": 5870
},
{
"epoch": 1.49,
"learning_rate": 2.8018735362997663e-05,
"loss": 0.0712,
"step": 5880
},
{
"epoch": 1.49,
"learning_rate": 2.7971896955503514e-05,
"loss": 0.1425,
"step": 5890
},
{
"epoch": 1.49,
"learning_rate": 2.792505854800937e-05,
"loss": 0.0445,
"step": 5900
},
{
"epoch": 1.49,
"learning_rate": 2.7878220140515226e-05,
"loss": 0.0095,
"step": 5910
},
{
"epoch": 1.5,
"learning_rate": 2.7831381733021076e-05,
"loss": 0.1108,
"step": 5920
},
{
"epoch": 1.5,
"learning_rate": 2.7784543325526934e-05,
"loss": 0.0579,
"step": 5930
},
{
"epoch": 1.5,
"learning_rate": 2.7737704918032785e-05,
"loss": 0.0711,
"step": 5940
},
{
"epoch": 1.5,
"learning_rate": 2.7690866510538643e-05,
"loss": 0.1386,
"step": 5950
},
{
"epoch": 1.51,
"learning_rate": 2.76440281030445e-05,
"loss": 0.0443,
"step": 5960
},
{
"epoch": 1.51,
"learning_rate": 2.759718969555035e-05,
"loss": 0.04,
"step": 5970
},
{
"epoch": 1.51,
"learning_rate": 2.755035128805621e-05,
"loss": 0.1138,
"step": 5980
},
{
"epoch": 1.51,
"learning_rate": 2.750351288056206e-05,
"loss": 0.0534,
"step": 5990
},
{
"epoch": 1.52,
"learning_rate": 2.7456674473067918e-05,
"loss": 0.0334,
"step": 6000
},
{
"epoch": 1.52,
"learning_rate": 2.7409836065573775e-05,
"loss": 0.0433,
"step": 6010
},
{
"epoch": 1.52,
"learning_rate": 2.7362997658079626e-05,
"loss": 0.0974,
"step": 6020
},
{
"epoch": 1.53,
"learning_rate": 2.7316159250585484e-05,
"loss": 0.0935,
"step": 6030
},
{
"epoch": 1.53,
"learning_rate": 2.7269320843091335e-05,
"loss": 0.1055,
"step": 6040
},
{
"epoch": 1.53,
"learning_rate": 2.722248243559719e-05,
"loss": 0.0861,
"step": 6050
},
{
"epoch": 1.53,
"learning_rate": 2.7175644028103047e-05,
"loss": 0.1138,
"step": 6060
},
{
"epoch": 1.54,
"learning_rate": 2.7128805620608898e-05,
"loss": 0.0759,
"step": 6070
},
{
"epoch": 1.54,
"learning_rate": 2.7081967213114756e-05,
"loss": 0.1066,
"step": 6080
},
{
"epoch": 1.54,
"learning_rate": 2.7035128805620607e-05,
"loss": 0.0256,
"step": 6090
},
{
"epoch": 1.54,
"learning_rate": 2.6988290398126464e-05,
"loss": 0.0746,
"step": 6100
},
{
"epoch": 1.55,
"learning_rate": 2.6941451990632322e-05,
"loss": 0.1099,
"step": 6110
},
{
"epoch": 1.55,
"learning_rate": 2.6894613583138173e-05,
"loss": 0.0847,
"step": 6120
},
{
"epoch": 1.55,
"learning_rate": 2.684777517564403e-05,
"loss": 0.0987,
"step": 6130
},
{
"epoch": 1.55,
"learning_rate": 2.6800936768149888e-05,
"loss": 0.0433,
"step": 6140
},
{
"epoch": 1.56,
"learning_rate": 2.675409836065574e-05,
"loss": 0.0269,
"step": 6150
},
{
"epoch": 1.56,
"learning_rate": 2.6707259953161597e-05,
"loss": 0.0517,
"step": 6160
},
{
"epoch": 1.56,
"learning_rate": 2.6660421545667448e-05,
"loss": 0.0493,
"step": 6170
},
{
"epoch": 1.56,
"learning_rate": 2.6613583138173302e-05,
"loss": 0.0394,
"step": 6180
},
{
"epoch": 1.57,
"learning_rate": 2.656674473067916e-05,
"loss": 0.0066,
"step": 6190
},
{
"epoch": 1.57,
"learning_rate": 2.651990632318501e-05,
"loss": 0.0695,
"step": 6200
},
{
"epoch": 1.57,
"learning_rate": 2.647306791569087e-05,
"loss": 0.0687,
"step": 6210
},
{
"epoch": 1.57,
"learning_rate": 2.642622950819672e-05,
"loss": 0.0875,
"step": 6220
},
{
"epoch": 1.58,
"learning_rate": 2.6379391100702577e-05,
"loss": 0.1237,
"step": 6230
},
{
"epoch": 1.58,
"learning_rate": 2.6332552693208435e-05,
"loss": 0.0286,
"step": 6240
},
{
"epoch": 1.58,
"learning_rate": 2.6285714285714286e-05,
"loss": 0.0732,
"step": 6250
},
{
"epoch": 1.58,
"learning_rate": 2.6238875878220143e-05,
"loss": 0.098,
"step": 6260
},
{
"epoch": 1.59,
"learning_rate": 2.6192037470725994e-05,
"loss": 0.0726,
"step": 6270
},
{
"epoch": 1.59,
"learning_rate": 2.6145199063231852e-05,
"loss": 0.0238,
"step": 6280
},
{
"epoch": 1.59,
"learning_rate": 2.6098360655737706e-05,
"loss": 0.0508,
"step": 6290
},
{
"epoch": 1.59,
"learning_rate": 2.605152224824356e-05,
"loss": 0.1049,
"step": 6300
},
{
"epoch": 1.6,
"learning_rate": 2.6004683840749415e-05,
"loss": 0.0545,
"step": 6310
},
{
"epoch": 1.6,
"learning_rate": 2.595784543325527e-05,
"loss": 0.0181,
"step": 6320
},
{
"epoch": 1.6,
"learning_rate": 2.5911007025761124e-05,
"loss": 0.0747,
"step": 6330
},
{
"epoch": 1.6,
"learning_rate": 2.586416861826698e-05,
"loss": 0.0279,
"step": 6340
},
{
"epoch": 1.61,
"learning_rate": 2.5817330210772832e-05,
"loss": 0.0956,
"step": 6350
},
{
"epoch": 1.61,
"learning_rate": 2.577049180327869e-05,
"loss": 0.1043,
"step": 6360
},
{
"epoch": 1.61,
"learning_rate": 2.5723653395784548e-05,
"loss": 0.1176,
"step": 6370
},
{
"epoch": 1.61,
"learning_rate": 2.56768149882904e-05,
"loss": 0.0528,
"step": 6380
},
{
"epoch": 1.62,
"learning_rate": 2.5629976580796256e-05,
"loss": 0.0567,
"step": 6390
},
{
"epoch": 1.62,
"learning_rate": 2.5583138173302107e-05,
"loss": 0.1121,
"step": 6400
},
{
"epoch": 1.62,
"learning_rate": 2.5536299765807965e-05,
"loss": 0.0709,
"step": 6410
},
{
"epoch": 1.62,
"learning_rate": 2.548946135831382e-05,
"loss": 0.0264,
"step": 6420
},
{
"epoch": 1.63,
"learning_rate": 2.5442622950819674e-05,
"loss": 0.1369,
"step": 6430
},
{
"epoch": 1.63,
"learning_rate": 2.5395784543325528e-05,
"loss": 0.1269,
"step": 6440
},
{
"epoch": 1.63,
"learning_rate": 2.5348946135831382e-05,
"loss": 0.0212,
"step": 6450
},
{
"epoch": 1.63,
"learning_rate": 2.5302107728337236e-05,
"loss": 0.0204,
"step": 6460
},
{
"epoch": 1.64,
"learning_rate": 2.5255269320843094e-05,
"loss": 0.0515,
"step": 6470
},
{
"epoch": 1.64,
"learning_rate": 2.5208430913348945e-05,
"loss": 0.1161,
"step": 6480
},
{
"epoch": 1.64,
"learning_rate": 2.5161592505854803e-05,
"loss": 0.187,
"step": 6490
},
{
"epoch": 1.64,
"learning_rate": 2.5114754098360654e-05,
"loss": 0.0829,
"step": 6500
},
{
"epoch": 1.65,
"learning_rate": 2.506791569086651e-05,
"loss": 0.0383,
"step": 6510
},
{
"epoch": 1.65,
"learning_rate": 2.502107728337237e-05,
"loss": 0.0712,
"step": 6520
},
{
"epoch": 1.65,
"learning_rate": 2.4974238875878223e-05,
"loss": 0.0942,
"step": 6530
},
{
"epoch": 1.65,
"learning_rate": 2.4927400468384078e-05,
"loss": 0.0992,
"step": 6540
},
{
"epoch": 1.66,
"learning_rate": 2.4880562060889932e-05,
"loss": 0.0534,
"step": 6550
},
{
"epoch": 1.66,
"learning_rate": 2.4833723653395786e-05,
"loss": 0.044,
"step": 6560
},
{
"epoch": 1.66,
"learning_rate": 2.478688524590164e-05,
"loss": 0.0783,
"step": 6570
},
{
"epoch": 1.66,
"learning_rate": 2.4740046838407495e-05,
"loss": 0.0383,
"step": 6580
},
{
"epoch": 1.67,
"learning_rate": 2.469320843091335e-05,
"loss": 0.1166,
"step": 6590
},
{
"epoch": 1.67,
"learning_rate": 2.4646370023419204e-05,
"loss": 0.0792,
"step": 6600
},
{
"epoch": 1.67,
"learning_rate": 2.4599531615925058e-05,
"loss": 0.0898,
"step": 6610
},
{
"epoch": 1.67,
"learning_rate": 2.4552693208430916e-05,
"loss": 0.0817,
"step": 6620
},
{
"epoch": 1.68,
"learning_rate": 2.450585480093677e-05,
"loss": 0.0572,
"step": 6630
},
{
"epoch": 1.68,
"learning_rate": 2.4459016393442624e-05,
"loss": 0.0857,
"step": 6640
},
{
"epoch": 1.68,
"learning_rate": 2.441217798594848e-05,
"loss": 0.0917,
"step": 6650
},
{
"epoch": 1.68,
"learning_rate": 2.4365339578454333e-05,
"loss": 0.0563,
"step": 6660
},
{
"epoch": 1.69,
"learning_rate": 2.431850117096019e-05,
"loss": 0.0392,
"step": 6670
},
{
"epoch": 1.69,
"learning_rate": 2.4271662763466045e-05,
"loss": 0.0425,
"step": 6680
},
{
"epoch": 1.69,
"learning_rate": 2.42248243559719e-05,
"loss": 0.0214,
"step": 6690
},
{
"epoch": 1.69,
"learning_rate": 2.4177985948477754e-05,
"loss": 0.0584,
"step": 6700
},
{
"epoch": 1.7,
"learning_rate": 2.4131147540983608e-05,
"loss": 0.0946,
"step": 6710
},
{
"epoch": 1.7,
"learning_rate": 2.4084309133489462e-05,
"loss": 0.1131,
"step": 6720
},
{
"epoch": 1.7,
"learning_rate": 2.4037470725995316e-05,
"loss": 0.1002,
"step": 6730
},
{
"epoch": 1.7,
"learning_rate": 2.399063231850117e-05,
"loss": 0.0329,
"step": 6740
},
{
"epoch": 1.71,
"learning_rate": 2.3943793911007025e-05,
"loss": 0.0447,
"step": 6750
},
{
"epoch": 1.71,
"learning_rate": 2.3896955503512883e-05,
"loss": 0.042,
"step": 6760
},
{
"epoch": 1.71,
"learning_rate": 2.3850117096018737e-05,
"loss": 0.0726,
"step": 6770
},
{
"epoch": 1.71,
"learning_rate": 2.380327868852459e-05,
"loss": 0.073,
"step": 6780
},
{
"epoch": 1.72,
"learning_rate": 2.3756440281030446e-05,
"loss": 0.0694,
"step": 6790
},
{
"epoch": 1.72,
"learning_rate": 2.37096018735363e-05,
"loss": 0.0517,
"step": 6800
},
{
"epoch": 1.72,
"learning_rate": 2.3662763466042158e-05,
"loss": 0.0055,
"step": 6810
},
{
"epoch": 1.72,
"learning_rate": 2.3615925058548012e-05,
"loss": 0.0549,
"step": 6820
},
{
"epoch": 1.73,
"learning_rate": 2.3569086651053866e-05,
"loss": 0.0339,
"step": 6830
},
{
"epoch": 1.73,
"learning_rate": 2.352224824355972e-05,
"loss": 0.0534,
"step": 6840
},
{
"epoch": 1.73,
"learning_rate": 2.3475409836065575e-05,
"loss": 0.0211,
"step": 6850
},
{
"epoch": 1.73,
"learning_rate": 2.342857142857143e-05,
"loss": 0.0492,
"step": 6860
},
{
"epoch": 1.74,
"learning_rate": 2.3381733021077284e-05,
"loss": 0.0159,
"step": 6870
},
{
"epoch": 1.74,
"learning_rate": 2.3334894613583138e-05,
"loss": 0.0215,
"step": 6880
},
{
"epoch": 1.74,
"learning_rate": 2.3288056206088992e-05,
"loss": 0.0357,
"step": 6890
},
{
"epoch": 1.75,
"learning_rate": 2.324121779859485e-05,
"loss": 0.0914,
"step": 6900
},
{
"epoch": 1.75,
"learning_rate": 2.3194379391100704e-05,
"loss": 0.0068,
"step": 6910
},
{
"epoch": 1.75,
"learning_rate": 2.314754098360656e-05,
"loss": 0.1821,
"step": 6920
},
{
"epoch": 1.75,
"learning_rate": 2.3100702576112413e-05,
"loss": 0.0757,
"step": 6930
},
{
"epoch": 1.76,
"learning_rate": 2.305386416861827e-05,
"loss": 0.0274,
"step": 6940
},
{
"epoch": 1.76,
"learning_rate": 2.3007025761124125e-05,
"loss": 0.0484,
"step": 6950
},
{
"epoch": 1.76,
"learning_rate": 2.296018735362998e-05,
"loss": 0.0868,
"step": 6960
},
{
"epoch": 1.76,
"learning_rate": 2.2913348946135834e-05,
"loss": 0.0724,
"step": 6970
},
{
"epoch": 1.77,
"learning_rate": 2.2866510538641684e-05,
"loss": 0.0994,
"step": 6980
},
{
"epoch": 1.77,
"learning_rate": 2.2819672131147542e-05,
"loss": 0.0764,
"step": 6990
},
{
"epoch": 1.77,
"learning_rate": 2.2772833723653396e-05,
"loss": 0.0533,
"step": 7000
},
{
"epoch": 1.77,
"learning_rate": 2.272599531615925e-05,
"loss": 0.036,
"step": 7010
},
{
"epoch": 1.78,
"learning_rate": 2.2679156908665105e-05,
"loss": 0.0348,
"step": 7020
},
{
"epoch": 1.78,
"learning_rate": 2.2632318501170963e-05,
"loss": 0.1071,
"step": 7030
},
{
"epoch": 1.78,
"learning_rate": 2.2585480093676817e-05,
"loss": 0.0409,
"step": 7040
},
{
"epoch": 1.78,
"learning_rate": 2.253864168618267e-05,
"loss": 0.0277,
"step": 7050
},
{
"epoch": 1.79,
"learning_rate": 2.2491803278688526e-05,
"loss": 0.0324,
"step": 7060
},
{
"epoch": 1.79,
"learning_rate": 2.244496487119438e-05,
"loss": 0.0725,
"step": 7070
},
{
"epoch": 1.79,
"learning_rate": 2.2398126463700238e-05,
"loss": 0.1659,
"step": 7080
},
{
"epoch": 1.79,
"learning_rate": 2.2351288056206092e-05,
"loss": 0.0501,
"step": 7090
},
{
"epoch": 1.8,
"learning_rate": 2.2304449648711946e-05,
"loss": 0.11,
"step": 7100
},
{
"epoch": 1.8,
"learning_rate": 2.2257611241217797e-05,
"loss": 0.0126,
"step": 7110
},
{
"epoch": 1.8,
"learning_rate": 2.221077283372365e-05,
"loss": 0.158,
"step": 7120
},
{
"epoch": 1.8,
"learning_rate": 2.216393442622951e-05,
"loss": 0.0884,
"step": 7130
},
{
"epoch": 1.81,
"learning_rate": 2.2117096018735364e-05,
"loss": 0.0974,
"step": 7140
},
{
"epoch": 1.81,
"learning_rate": 2.2070257611241218e-05,
"loss": 0.0859,
"step": 7150
},
{
"epoch": 1.81,
"learning_rate": 2.2023419203747072e-05,
"loss": 0.0063,
"step": 7160
},
{
"epoch": 1.81,
"learning_rate": 2.197658079625293e-05,
"loss": 0.0762,
"step": 7170
},
{
"epoch": 1.82,
"learning_rate": 2.1929742388758784e-05,
"loss": 0.0412,
"step": 7180
},
{
"epoch": 1.82,
"learning_rate": 2.188290398126464e-05,
"loss": 0.0676,
"step": 7190
},
{
"epoch": 1.82,
"learning_rate": 2.1836065573770493e-05,
"loss": 0.0028,
"step": 7200
},
{
"epoch": 1.82,
"learning_rate": 2.1789227166276347e-05,
"loss": 0.0158,
"step": 7210
},
{
"epoch": 1.83,
"learning_rate": 2.1742388758782205e-05,
"loss": 0.0685,
"step": 7220
},
{
"epoch": 1.83,
"learning_rate": 2.169555035128806e-05,
"loss": 0.0489,
"step": 7230
},
{
"epoch": 1.83,
"learning_rate": 2.164871194379391e-05,
"loss": 0.0435,
"step": 7240
},
{
"epoch": 1.83,
"learning_rate": 2.1601873536299764e-05,
"loss": 0.0959,
"step": 7250
},
{
"epoch": 1.84,
"learning_rate": 2.1555035128805622e-05,
"loss": 0.0644,
"step": 7260
},
{
"epoch": 1.84,
"learning_rate": 2.1508196721311476e-05,
"loss": 0.0645,
"step": 7270
},
{
"epoch": 1.84,
"learning_rate": 2.146135831381733e-05,
"loss": 0.0473,
"step": 7280
},
{
"epoch": 1.84,
"learning_rate": 2.1414519906323185e-05,
"loss": 0.0449,
"step": 7290
},
{
"epoch": 1.85,
"learning_rate": 2.136768149882904e-05,
"loss": 0.1278,
"step": 7300
},
{
"epoch": 1.85,
"learning_rate": 2.1320843091334897e-05,
"loss": 0.0487,
"step": 7310
},
{
"epoch": 1.85,
"learning_rate": 2.127400468384075e-05,
"loss": 0.051,
"step": 7320
},
{
"epoch": 1.85,
"learning_rate": 2.1227166276346606e-05,
"loss": 0.0455,
"step": 7330
},
{
"epoch": 1.86,
"learning_rate": 2.118032786885246e-05,
"loss": 0.0938,
"step": 7340
},
{
"epoch": 1.86,
"learning_rate": 2.1133489461358318e-05,
"loss": 0.0143,
"step": 7350
},
{
"epoch": 1.86,
"learning_rate": 2.108665105386417e-05,
"loss": 0.0204,
"step": 7360
},
{
"epoch": 1.86,
"learning_rate": 2.1039812646370023e-05,
"loss": 0.0121,
"step": 7370
},
{
"epoch": 1.87,
"learning_rate": 2.0992974238875877e-05,
"loss": 0.0051,
"step": 7380
},
{
"epoch": 1.87,
"learning_rate": 2.094613583138173e-05,
"loss": 0.1138,
"step": 7390
},
{
"epoch": 1.87,
"learning_rate": 2.089929742388759e-05,
"loss": 0.0192,
"step": 7400
},
{
"epoch": 1.87,
"learning_rate": 2.0852459016393444e-05,
"loss": 0.041,
"step": 7410
},
{
"epoch": 1.88,
"learning_rate": 2.0805620608899298e-05,
"loss": 0.0657,
"step": 7420
},
{
"epoch": 1.88,
"learning_rate": 2.0758782201405152e-05,
"loss": 0.0065,
"step": 7430
},
{
"epoch": 1.88,
"learning_rate": 2.0711943793911007e-05,
"loss": 0.0425,
"step": 7440
},
{
"epoch": 1.88,
"learning_rate": 2.0665105386416864e-05,
"loss": 0.0394,
"step": 7450
},
{
"epoch": 1.89,
"learning_rate": 2.061826697892272e-05,
"loss": 0.0318,
"step": 7460
},
{
"epoch": 1.89,
"learning_rate": 2.0571428571428573e-05,
"loss": 0.0903,
"step": 7470
},
{
"epoch": 1.89,
"learning_rate": 2.0524590163934427e-05,
"loss": 0.0317,
"step": 7480
},
{
"epoch": 1.89,
"learning_rate": 2.047775175644028e-05,
"loss": 0.0366,
"step": 7490
},
{
"epoch": 1.9,
"learning_rate": 2.0430913348946136e-05,
"loss": 0.0685,
"step": 7500
},
{
"epoch": 1.9,
"learning_rate": 2.038407494145199e-05,
"loss": 0.1157,
"step": 7510
},
{
"epoch": 1.9,
"learning_rate": 2.0337236533957844e-05,
"loss": 0.0078,
"step": 7520
},
{
"epoch": 1.9,
"learning_rate": 2.02903981264637e-05,
"loss": 0.0385,
"step": 7530
},
{
"epoch": 1.91,
"learning_rate": 2.0243559718969556e-05,
"loss": 0.0168,
"step": 7540
},
{
"epoch": 1.91,
"learning_rate": 2.019672131147541e-05,
"loss": 0.0636,
"step": 7550
},
{
"epoch": 1.91,
"learning_rate": 2.0149882903981265e-05,
"loss": 0.107,
"step": 7560
},
{
"epoch": 1.91,
"learning_rate": 2.010304449648712e-05,
"loss": 0.0373,
"step": 7570
},
{
"epoch": 1.92,
"learning_rate": 2.0056206088992977e-05,
"loss": 0.0654,
"step": 7580
},
{
"epoch": 1.92,
"learning_rate": 2.000936768149883e-05,
"loss": 0.128,
"step": 7590
},
{
"epoch": 1.92,
"learning_rate": 1.9962529274004686e-05,
"loss": 0.0439,
"step": 7600
},
{
"epoch": 1.92,
"learning_rate": 1.991569086651054e-05,
"loss": 0.0216,
"step": 7610
},
{
"epoch": 1.93,
"learning_rate": 1.9868852459016394e-05,
"loss": 0.0739,
"step": 7620
},
{
"epoch": 1.93,
"learning_rate": 1.982201405152225e-05,
"loss": 0.0074,
"step": 7630
},
{
"epoch": 1.93,
"learning_rate": 1.9775175644028103e-05,
"loss": 0.0444,
"step": 7640
},
{
"epoch": 1.93,
"learning_rate": 1.9728337236533957e-05,
"loss": 0.0372,
"step": 7650
},
{
"epoch": 1.94,
"learning_rate": 1.968149882903981e-05,
"loss": 0.0238,
"step": 7660
},
{
"epoch": 1.94,
"learning_rate": 1.963466042154567e-05,
"loss": 0.0205,
"step": 7670
},
{
"epoch": 1.94,
"learning_rate": 1.9587822014051524e-05,
"loss": 0.0326,
"step": 7680
},
{
"epoch": 1.94,
"learning_rate": 1.9540983606557378e-05,
"loss": 0.1102,
"step": 7690
},
{
"epoch": 1.95,
"learning_rate": 1.9494145199063232e-05,
"loss": 0.1375,
"step": 7700
},
{
"epoch": 1.95,
"learning_rate": 1.9447306791569087e-05,
"loss": 0.0985,
"step": 7710
},
{
"epoch": 1.95,
"learning_rate": 1.9400468384074944e-05,
"loss": 0.0125,
"step": 7720
},
{
"epoch": 1.95,
"learning_rate": 1.93536299765808e-05,
"loss": 0.0485,
"step": 7730
},
{
"epoch": 1.96,
"learning_rate": 1.9306791569086653e-05,
"loss": 0.0162,
"step": 7740
},
{
"epoch": 1.96,
"learning_rate": 1.9259953161592507e-05,
"loss": 0.1309,
"step": 7750
},
{
"epoch": 1.96,
"learning_rate": 1.921311475409836e-05,
"loss": 0.0619,
"step": 7760
},
{
"epoch": 1.97,
"learning_rate": 1.9166276346604216e-05,
"loss": 0.0377,
"step": 7770
},
{
"epoch": 1.97,
"learning_rate": 1.911943793911007e-05,
"loss": 0.0088,
"step": 7780
},
{
"epoch": 1.97,
"learning_rate": 1.9072599531615924e-05,
"loss": 0.0382,
"step": 7790
},
{
"epoch": 1.97,
"learning_rate": 1.902576112412178e-05,
"loss": 0.0893,
"step": 7800
},
{
"epoch": 1.98,
"learning_rate": 1.8978922716627637e-05,
"loss": 0.1147,
"step": 7810
},
{
"epoch": 1.98,
"learning_rate": 1.893208430913349e-05,
"loss": 0.0552,
"step": 7820
},
{
"epoch": 1.98,
"learning_rate": 1.8885245901639345e-05,
"loss": 0.0784,
"step": 7830
},
{
"epoch": 1.98,
"learning_rate": 1.88384074941452e-05,
"loss": 0.0221,
"step": 7840
},
{
"epoch": 1.99,
"learning_rate": 1.8791569086651054e-05,
"loss": 0.0459,
"step": 7850
},
{
"epoch": 1.99,
"learning_rate": 1.874473067915691e-05,
"loss": 0.0612,
"step": 7860
},
{
"epoch": 1.99,
"learning_rate": 1.8697892271662766e-05,
"loss": 0.1254,
"step": 7870
},
{
"epoch": 1.99,
"learning_rate": 1.865105386416862e-05,
"loss": 0.0934,
"step": 7880
},
{
"epoch": 2.0,
"learning_rate": 1.8604215456674474e-05,
"loss": 0.0872,
"step": 7890
},
{
"epoch": 2.0,
"learning_rate": 1.855737704918033e-05,
"loss": 0.0536,
"step": 7900
},
{
"epoch": 2.0,
"eval_accuracy": 0.9965860597439545,
"eval_loss": 0.010263873264193535,
"eval_runtime": 82.4603,
"eval_samples_per_second": 85.253,
"eval_steps_per_second": 21.319,
"step": 7908
},
{
"epoch": 2.0,
"learning_rate": 1.8510538641686183e-05,
"loss": 0.0841,
"step": 7910
},
{
"epoch": 2.0,
"learning_rate": 1.8463700234192037e-05,
"loss": 0.0219,
"step": 7920
},
{
"epoch": 2.01,
"learning_rate": 1.841686182669789e-05,
"loss": 0.0294,
"step": 7930
},
{
"epoch": 2.01,
"learning_rate": 1.8370023419203746e-05,
"loss": 0.003,
"step": 7940
},
{
"epoch": 2.01,
"learning_rate": 1.8323185011709604e-05,
"loss": 0.0335,
"step": 7950
},
{
"epoch": 2.01,
"learning_rate": 1.8276346604215458e-05,
"loss": 0.0244,
"step": 7960
},
{
"epoch": 2.02,
"learning_rate": 1.8229508196721312e-05,
"loss": 0.144,
"step": 7970
},
{
"epoch": 2.02,
"learning_rate": 1.8182669789227167e-05,
"loss": 0.0732,
"step": 7980
},
{
"epoch": 2.02,
"learning_rate": 1.8135831381733024e-05,
"loss": 0.1172,
"step": 7990
},
{
"epoch": 2.02,
"learning_rate": 1.808899297423888e-05,
"loss": 0.0084,
"step": 8000
},
{
"epoch": 2.03,
"learning_rate": 1.8042154566744733e-05,
"loss": 0.0499,
"step": 8010
},
{
"epoch": 2.03,
"learning_rate": 1.7995316159250587e-05,
"loss": 0.0586,
"step": 8020
},
{
"epoch": 2.03,
"learning_rate": 1.794847775175644e-05,
"loss": 0.0093,
"step": 8030
},
{
"epoch": 2.03,
"learning_rate": 1.7901639344262296e-05,
"loss": 0.0332,
"step": 8040
},
{
"epoch": 2.04,
"learning_rate": 1.785480093676815e-05,
"loss": 0.0222,
"step": 8050
},
{
"epoch": 2.04,
"learning_rate": 1.7807962529274004e-05,
"loss": 0.0778,
"step": 8060
},
{
"epoch": 2.04,
"learning_rate": 1.776112412177986e-05,
"loss": 0.005,
"step": 8070
},
{
"epoch": 2.04,
"learning_rate": 1.7714285714285713e-05,
"loss": 0.0826,
"step": 8080
},
{
"epoch": 2.05,
"learning_rate": 1.766744730679157e-05,
"loss": 0.0115,
"step": 8090
},
{
"epoch": 2.05,
"learning_rate": 1.7620608899297425e-05,
"loss": 0.097,
"step": 8100
},
{
"epoch": 2.05,
"learning_rate": 1.757377049180328e-05,
"loss": 0.0841,
"step": 8110
},
{
"epoch": 2.05,
"learning_rate": 1.7526932084309134e-05,
"loss": 0.0622,
"step": 8120
},
{
"epoch": 2.06,
"learning_rate": 1.748009367681499e-05,
"loss": 0.0221,
"step": 8130
},
{
"epoch": 2.06,
"learning_rate": 1.7433255269320846e-05,
"loss": 0.045,
"step": 8140
},
{
"epoch": 2.06,
"learning_rate": 1.73864168618267e-05,
"loss": 0.0541,
"step": 8150
},
{
"epoch": 2.06,
"learning_rate": 1.7339578454332554e-05,
"loss": 0.0427,
"step": 8160
},
{
"epoch": 2.07,
"learning_rate": 1.729274004683841e-05,
"loss": 0.0042,
"step": 8170
},
{
"epoch": 2.07,
"learning_rate": 1.7245901639344263e-05,
"loss": 0.0184,
"step": 8180
},
{
"epoch": 2.07,
"learning_rate": 1.7199063231850117e-05,
"loss": 0.0066,
"step": 8190
},
{
"epoch": 2.07,
"learning_rate": 1.715222482435597e-05,
"loss": 0.0243,
"step": 8200
},
{
"epoch": 2.08,
"learning_rate": 1.7105386416861826e-05,
"loss": 0.0916,
"step": 8210
},
{
"epoch": 2.08,
"learning_rate": 1.7058548009367684e-05,
"loss": 0.0072,
"step": 8220
},
{
"epoch": 2.08,
"learning_rate": 1.7011709601873538e-05,
"loss": 0.0158,
"step": 8230
},
{
"epoch": 2.08,
"learning_rate": 1.6964871194379392e-05,
"loss": 0.0384,
"step": 8240
},
{
"epoch": 2.09,
"learning_rate": 1.6918032786885247e-05,
"loss": 0.0378,
"step": 8250
},
{
"epoch": 2.09,
"learning_rate": 1.68711943793911e-05,
"loss": 0.0048,
"step": 8260
},
{
"epoch": 2.09,
"learning_rate": 1.682435597189696e-05,
"loss": 0.0492,
"step": 8270
},
{
"epoch": 2.09,
"learning_rate": 1.6777517564402813e-05,
"loss": 0.071,
"step": 8280
},
{
"epoch": 2.1,
"learning_rate": 1.6730679156908667e-05,
"loss": 0.0199,
"step": 8290
},
{
"epoch": 2.1,
"learning_rate": 1.668384074941452e-05,
"loss": 0.0944,
"step": 8300
},
{
"epoch": 2.1,
"learning_rate": 1.6637002341920376e-05,
"loss": 0.0051,
"step": 8310
},
{
"epoch": 2.1,
"learning_rate": 1.659016393442623e-05,
"loss": 0.0543,
"step": 8320
},
{
"epoch": 2.11,
"learning_rate": 1.6543325526932085e-05,
"loss": 0.0516,
"step": 8330
},
{
"epoch": 2.11,
"learning_rate": 1.649648711943794e-05,
"loss": 0.0348,
"step": 8340
},
{
"epoch": 2.11,
"learning_rate": 1.6449648711943793e-05,
"loss": 0.0219,
"step": 8350
},
{
"epoch": 2.11,
"learning_rate": 1.640281030444965e-05,
"loss": 0.0551,
"step": 8360
},
{
"epoch": 2.12,
"learning_rate": 1.6355971896955505e-05,
"loss": 0.0719,
"step": 8370
},
{
"epoch": 2.12,
"learning_rate": 1.630913348946136e-05,
"loss": 0.022,
"step": 8380
},
{
"epoch": 2.12,
"learning_rate": 1.6262295081967214e-05,
"loss": 0.0133,
"step": 8390
},
{
"epoch": 2.12,
"learning_rate": 1.6215456674473068e-05,
"loss": 0.0689,
"step": 8400
},
{
"epoch": 2.13,
"learning_rate": 1.6168618266978926e-05,
"loss": 0.157,
"step": 8410
},
{
"epoch": 2.13,
"learning_rate": 1.612177985948478e-05,
"loss": 0.0191,
"step": 8420
},
{
"epoch": 2.13,
"learning_rate": 1.6074941451990634e-05,
"loss": 0.1239,
"step": 8430
},
{
"epoch": 2.13,
"learning_rate": 1.6028103044496485e-05,
"loss": 0.0393,
"step": 8440
},
{
"epoch": 2.14,
"learning_rate": 1.5981264637002343e-05,
"loss": 0.0065,
"step": 8450
},
{
"epoch": 2.14,
"learning_rate": 1.5934426229508197e-05,
"loss": 0.0633,
"step": 8460
},
{
"epoch": 2.14,
"learning_rate": 1.588758782201405e-05,
"loss": 0.0086,
"step": 8470
},
{
"epoch": 2.14,
"learning_rate": 1.5840749414519906e-05,
"loss": 0.1167,
"step": 8480
},
{
"epoch": 2.15,
"learning_rate": 1.579391100702576e-05,
"loss": 0.0431,
"step": 8490
},
{
"epoch": 2.15,
"learning_rate": 1.5747072599531618e-05,
"loss": 0.0536,
"step": 8500
},
{
"epoch": 2.15,
"learning_rate": 1.5700234192037472e-05,
"loss": 0.0374,
"step": 8510
},
{
"epoch": 2.15,
"learning_rate": 1.5653395784543327e-05,
"loss": 0.0155,
"step": 8520
},
{
"epoch": 2.16,
"learning_rate": 1.560655737704918e-05,
"loss": 0.1479,
"step": 8530
},
{
"epoch": 2.16,
"learning_rate": 1.555971896955504e-05,
"loss": 0.0067,
"step": 8540
},
{
"epoch": 2.16,
"learning_rate": 1.5512880562060893e-05,
"loss": 0.0769,
"step": 8550
},
{
"epoch": 2.16,
"learning_rate": 1.5466042154566744e-05,
"loss": 0.0146,
"step": 8560
},
{
"epoch": 2.17,
"learning_rate": 1.5419203747072598e-05,
"loss": 0.096,
"step": 8570
},
{
"epoch": 2.17,
"learning_rate": 1.5372365339578452e-05,
"loss": 0.113,
"step": 8580
},
{
"epoch": 2.17,
"learning_rate": 1.532552693208431e-05,
"loss": 0.2759,
"step": 8590
},
{
"epoch": 2.18,
"learning_rate": 1.5278688524590165e-05,
"loss": 0.0936,
"step": 8600
},
{
"epoch": 2.18,
"learning_rate": 1.5231850117096019e-05,
"loss": 0.0129,
"step": 8610
},
{
"epoch": 2.18,
"learning_rate": 1.5185011709601873e-05,
"loss": 0.0076,
"step": 8620
},
{
"epoch": 2.18,
"learning_rate": 1.513817330210773e-05,
"loss": 0.0052,
"step": 8630
},
{
"epoch": 2.19,
"learning_rate": 1.5091334894613585e-05,
"loss": 0.0785,
"step": 8640
},
{
"epoch": 2.19,
"learning_rate": 1.504449648711944e-05,
"loss": 0.0083,
"step": 8650
},
{
"epoch": 2.19,
"learning_rate": 1.4997658079625294e-05,
"loss": 0.0651,
"step": 8660
},
{
"epoch": 2.19,
"learning_rate": 1.4950819672131146e-05,
"loss": 0.0776,
"step": 8670
},
{
"epoch": 2.2,
"learning_rate": 1.4903981264637004e-05,
"loss": 0.0136,
"step": 8680
},
{
"epoch": 2.2,
"learning_rate": 1.4857142857142858e-05,
"loss": 0.0393,
"step": 8690
},
{
"epoch": 2.2,
"learning_rate": 1.4810304449648713e-05,
"loss": 0.0994,
"step": 8700
},
{
"epoch": 2.2,
"learning_rate": 1.4763466042154567e-05,
"loss": 0.0303,
"step": 8710
},
{
"epoch": 2.21,
"learning_rate": 1.4716627634660421e-05,
"loss": 0.0166,
"step": 8720
},
{
"epoch": 2.21,
"learning_rate": 1.4669789227166277e-05,
"loss": 0.1027,
"step": 8730
},
{
"epoch": 2.21,
"learning_rate": 1.4622950819672132e-05,
"loss": 0.0044,
"step": 8740
},
{
"epoch": 2.21,
"learning_rate": 1.4576112412177986e-05,
"loss": 0.0051,
"step": 8750
},
{
"epoch": 2.22,
"learning_rate": 1.452927400468384e-05,
"loss": 0.0053,
"step": 8760
},
{
"epoch": 2.22,
"learning_rate": 1.4482435597189698e-05,
"loss": 0.0528,
"step": 8770
},
{
"epoch": 2.22,
"learning_rate": 1.4435597189695552e-05,
"loss": 0.0293,
"step": 8780
},
{
"epoch": 2.22,
"learning_rate": 1.4388758782201407e-05,
"loss": 0.0575,
"step": 8790
},
{
"epoch": 2.23,
"learning_rate": 1.434192037470726e-05,
"loss": 0.0692,
"step": 8800
},
{
"epoch": 2.23,
"learning_rate": 1.4295081967213114e-05,
"loss": 0.0319,
"step": 8810
},
{
"epoch": 2.23,
"learning_rate": 1.4248243559718971e-05,
"loss": 0.0888,
"step": 8820
},
{
"epoch": 2.23,
"learning_rate": 1.4201405152224826e-05,
"loss": 0.0367,
"step": 8830
},
{
"epoch": 2.24,
"learning_rate": 1.415456674473068e-05,
"loss": 0.0202,
"step": 8840
},
{
"epoch": 2.24,
"learning_rate": 1.4107728337236534e-05,
"loss": 0.0336,
"step": 8850
},
{
"epoch": 2.24,
"learning_rate": 1.406088992974239e-05,
"loss": 0.0254,
"step": 8860
},
{
"epoch": 2.24,
"learning_rate": 1.4014051522248245e-05,
"loss": 0.013,
"step": 8870
},
{
"epoch": 2.25,
"learning_rate": 1.3967213114754099e-05,
"loss": 0.0082,
"step": 8880
},
{
"epoch": 2.25,
"learning_rate": 1.3920374707259953e-05,
"loss": 0.0397,
"step": 8890
},
{
"epoch": 2.25,
"learning_rate": 1.3873536299765807e-05,
"loss": 0.0605,
"step": 8900
},
{
"epoch": 2.25,
"learning_rate": 1.3826697892271665e-05,
"loss": 0.0385,
"step": 8910
},
{
"epoch": 2.26,
"learning_rate": 1.377985948477752e-05,
"loss": 0.0157,
"step": 8920
},
{
"epoch": 2.26,
"learning_rate": 1.3733021077283372e-05,
"loss": 0.0383,
"step": 8930
},
{
"epoch": 2.26,
"learning_rate": 1.3686182669789226e-05,
"loss": 0.0098,
"step": 8940
},
{
"epoch": 2.26,
"learning_rate": 1.3639344262295084e-05,
"loss": 0.0475,
"step": 8950
},
{
"epoch": 2.27,
"learning_rate": 1.3592505854800938e-05,
"loss": 0.0287,
"step": 8960
},
{
"epoch": 2.27,
"learning_rate": 1.3545667447306793e-05,
"loss": 0.011,
"step": 8970
},
{
"epoch": 2.27,
"learning_rate": 1.3498829039812647e-05,
"loss": 0.0214,
"step": 8980
},
{
"epoch": 2.27,
"learning_rate": 1.3451990632318501e-05,
"loss": 0.0227,
"step": 8990
},
{
"epoch": 2.28,
"learning_rate": 1.3405152224824357e-05,
"loss": 0.0936,
"step": 9000
},
{
"epoch": 2.28,
"learning_rate": 1.3358313817330212e-05,
"loss": 0.1568,
"step": 9010
},
{
"epoch": 2.28,
"learning_rate": 1.3311475409836066e-05,
"loss": 0.0443,
"step": 9020
},
{
"epoch": 2.28,
"learning_rate": 1.326463700234192e-05,
"loss": 0.0064,
"step": 9030
},
{
"epoch": 2.29,
"learning_rate": 1.3217798594847775e-05,
"loss": 0.015,
"step": 9040
},
{
"epoch": 2.29,
"learning_rate": 1.317096018735363e-05,
"loss": 0.0461,
"step": 9050
},
{
"epoch": 2.29,
"learning_rate": 1.3124121779859485e-05,
"loss": 0.1275,
"step": 9060
},
{
"epoch": 2.29,
"learning_rate": 1.307728337236534e-05,
"loss": 0.0097,
"step": 9070
},
{
"epoch": 2.3,
"learning_rate": 1.3030444964871194e-05,
"loss": 0.0088,
"step": 9080
},
{
"epoch": 2.3,
"learning_rate": 1.2983606557377051e-05,
"loss": 0.0189,
"step": 9090
},
{
"epoch": 2.3,
"learning_rate": 1.2936768149882906e-05,
"loss": 0.058,
"step": 9100
},
{
"epoch": 2.3,
"learning_rate": 1.288992974238876e-05,
"loss": 0.0181,
"step": 9110
},
{
"epoch": 2.31,
"learning_rate": 1.2843091334894614e-05,
"loss": 0.0465,
"step": 9120
},
{
"epoch": 2.31,
"learning_rate": 1.2796252927400469e-05,
"loss": 0.0332,
"step": 9130
},
{
"epoch": 2.31,
"learning_rate": 1.2749414519906325e-05,
"loss": 0.0633,
"step": 9140
},
{
"epoch": 2.31,
"learning_rate": 1.2702576112412179e-05,
"loss": 0.0434,
"step": 9150
},
{
"epoch": 2.32,
"learning_rate": 1.2655737704918033e-05,
"loss": 0.0323,
"step": 9160
},
{
"epoch": 2.32,
"learning_rate": 1.2608899297423887e-05,
"loss": 0.0032,
"step": 9170
},
{
"epoch": 2.32,
"learning_rate": 1.2562060889929743e-05,
"loss": 0.04,
"step": 9180
},
{
"epoch": 2.32,
"learning_rate": 1.2515222482435598e-05,
"loss": 0.0267,
"step": 9190
},
{
"epoch": 2.33,
"learning_rate": 1.2468384074941452e-05,
"loss": 0.0054,
"step": 9200
},
{
"epoch": 2.33,
"learning_rate": 1.2421545667447306e-05,
"loss": 0.0145,
"step": 9210
},
{
"epoch": 2.33,
"learning_rate": 1.2374707259953162e-05,
"loss": 0.0102,
"step": 9220
},
{
"epoch": 2.33,
"learning_rate": 1.2327868852459017e-05,
"loss": 0.0651,
"step": 9230
},
{
"epoch": 2.34,
"learning_rate": 1.2281030444964873e-05,
"loss": 0.0324,
"step": 9240
},
{
"epoch": 2.34,
"learning_rate": 1.2234192037470727e-05,
"loss": 0.0514,
"step": 9250
},
{
"epoch": 2.34,
"learning_rate": 1.2187353629976581e-05,
"loss": 0.0647,
"step": 9260
},
{
"epoch": 2.34,
"learning_rate": 1.2140515222482436e-05,
"loss": 0.0595,
"step": 9270
},
{
"epoch": 2.35,
"learning_rate": 1.209367681498829e-05,
"loss": 0.006,
"step": 9280
},
{
"epoch": 2.35,
"learning_rate": 1.2046838407494146e-05,
"loss": 0.0465,
"step": 9290
},
{
"epoch": 2.35,
"learning_rate": 1.2e-05,
"loss": 0.0372,
"step": 9300
},
{
"epoch": 2.35,
"learning_rate": 1.1953161592505856e-05,
"loss": 0.0681,
"step": 9310
},
{
"epoch": 2.36,
"learning_rate": 1.190632318501171e-05,
"loss": 0.0457,
"step": 9320
},
{
"epoch": 2.36,
"learning_rate": 1.1859484777517565e-05,
"loss": 0.0018,
"step": 9330
},
{
"epoch": 2.36,
"learning_rate": 1.181264637002342e-05,
"loss": 0.0007,
"step": 9340
},
{
"epoch": 2.36,
"learning_rate": 1.1765807962529275e-05,
"loss": 0.0298,
"step": 9350
},
{
"epoch": 2.37,
"learning_rate": 1.171896955503513e-05,
"loss": 0.0439,
"step": 9360
},
{
"epoch": 2.37,
"learning_rate": 1.1672131147540984e-05,
"loss": 0.0619,
"step": 9370
},
{
"epoch": 2.37,
"learning_rate": 1.162529274004684e-05,
"loss": 0.0008,
"step": 9380
},
{
"epoch": 2.37,
"learning_rate": 1.1578454332552694e-05,
"loss": 0.0005,
"step": 9390
},
{
"epoch": 2.38,
"learning_rate": 1.1531615925058549e-05,
"loss": 0.1172,
"step": 9400
},
{
"epoch": 2.38,
"learning_rate": 1.1484777517564403e-05,
"loss": 0.1706,
"step": 9410
},
{
"epoch": 2.38,
"learning_rate": 1.1437939110070259e-05,
"loss": 0.0328,
"step": 9420
},
{
"epoch": 2.38,
"learning_rate": 1.1391100702576113e-05,
"loss": 0.013,
"step": 9430
},
{
"epoch": 2.39,
"learning_rate": 1.1344262295081967e-05,
"loss": 0.0388,
"step": 9440
},
{
"epoch": 2.39,
"learning_rate": 1.1297423887587823e-05,
"loss": 0.01,
"step": 9450
},
{
"epoch": 2.39,
"learning_rate": 1.1250585480093676e-05,
"loss": 0.0689,
"step": 9460
},
{
"epoch": 2.4,
"learning_rate": 1.1203747072599532e-05,
"loss": 0.1089,
"step": 9470
},
{
"epoch": 2.4,
"learning_rate": 1.1156908665105386e-05,
"loss": 0.0105,
"step": 9480
},
{
"epoch": 2.4,
"learning_rate": 1.1110070257611242e-05,
"loss": 0.1637,
"step": 9490
},
{
"epoch": 2.4,
"learning_rate": 1.1063231850117097e-05,
"loss": 0.0431,
"step": 9500
},
{
"epoch": 2.41,
"learning_rate": 1.1016393442622953e-05,
"loss": 0.0013,
"step": 9510
},
{
"epoch": 2.41,
"learning_rate": 1.0969555035128807e-05,
"loss": 0.0798,
"step": 9520
},
{
"epoch": 2.41,
"learning_rate": 1.092271662763466e-05,
"loss": 0.0065,
"step": 9530
},
{
"epoch": 2.41,
"learning_rate": 1.0875878220140516e-05,
"loss": 0.0609,
"step": 9540
},
{
"epoch": 2.42,
"learning_rate": 1.082903981264637e-05,
"loss": 0.0639,
"step": 9550
},
{
"epoch": 2.42,
"learning_rate": 1.0782201405152226e-05,
"loss": 0.0288,
"step": 9560
},
{
"epoch": 2.42,
"learning_rate": 1.073536299765808e-05,
"loss": 0.0262,
"step": 9570
},
{
"epoch": 2.42,
"learning_rate": 1.0688524590163936e-05,
"loss": 0.0652,
"step": 9580
},
{
"epoch": 2.43,
"learning_rate": 1.0641686182669789e-05,
"loss": 0.0155,
"step": 9590
},
{
"epoch": 2.43,
"learning_rate": 1.0594847775175643e-05,
"loss": 0.0436,
"step": 9600
},
{
"epoch": 2.43,
"learning_rate": 1.05480093676815e-05,
"loss": 0.1022,
"step": 9610
},
{
"epoch": 2.43,
"learning_rate": 1.0501170960187354e-05,
"loss": 0.0696,
"step": 9620
},
{
"epoch": 2.44,
"learning_rate": 1.045433255269321e-05,
"loss": 0.019,
"step": 9630
},
{
"epoch": 2.44,
"learning_rate": 1.0407494145199064e-05,
"loss": 0.0095,
"step": 9640
},
{
"epoch": 2.44,
"learning_rate": 1.0360655737704918e-05,
"loss": 0.0263,
"step": 9650
},
{
"epoch": 2.44,
"learning_rate": 1.0313817330210773e-05,
"loss": 0.0368,
"step": 9660
},
{
"epoch": 2.45,
"learning_rate": 1.0266978922716629e-05,
"loss": 0.007,
"step": 9670
},
{
"epoch": 2.45,
"learning_rate": 1.0220140515222483e-05,
"loss": 0.0081,
"step": 9680
},
{
"epoch": 2.45,
"learning_rate": 1.0173302107728337e-05,
"loss": 0.0245,
"step": 9690
},
{
"epoch": 2.45,
"learning_rate": 1.0126463700234193e-05,
"loss": 0.0421,
"step": 9700
},
{
"epoch": 2.46,
"learning_rate": 1.0079625292740047e-05,
"loss": 0.027,
"step": 9710
},
{
"epoch": 2.46,
"learning_rate": 1.0032786885245902e-05,
"loss": 0.0354,
"step": 9720
},
{
"epoch": 2.46,
"learning_rate": 9.985948477751756e-06,
"loss": 0.0451,
"step": 9730
},
{
"epoch": 2.46,
"learning_rate": 9.939110070257612e-06,
"loss": 0.0984,
"step": 9740
},
{
"epoch": 2.47,
"learning_rate": 9.892271662763466e-06,
"loss": 0.0468,
"step": 9750
},
{
"epoch": 2.47,
"learning_rate": 9.84543325526932e-06,
"loss": 0.0505,
"step": 9760
},
{
"epoch": 2.47,
"learning_rate": 9.798594847775177e-06,
"loss": 0.0616,
"step": 9770
},
{
"epoch": 2.47,
"learning_rate": 9.751756440281031e-06,
"loss": 0.0038,
"step": 9780
},
{
"epoch": 2.48,
"learning_rate": 9.704918032786885e-06,
"loss": 0.0147,
"step": 9790
},
{
"epoch": 2.48,
"learning_rate": 9.65807962529274e-06,
"loss": 0.0159,
"step": 9800
},
{
"epoch": 2.48,
"learning_rate": 9.611241217798596e-06,
"loss": 0.1126,
"step": 9810
},
{
"epoch": 2.48,
"learning_rate": 9.56440281030445e-06,
"loss": 0.1172,
"step": 9820
},
{
"epoch": 2.49,
"learning_rate": 9.517564402810306e-06,
"loss": 0.0465,
"step": 9830
},
{
"epoch": 2.49,
"learning_rate": 9.47072599531616e-06,
"loss": 0.0116,
"step": 9840
},
{
"epoch": 2.49,
"learning_rate": 9.423887587822015e-06,
"loss": 0.0006,
"step": 9850
},
{
"epoch": 2.49,
"learning_rate": 9.377049180327869e-06,
"loss": 0.0035,
"step": 9860
},
{
"epoch": 2.5,
"learning_rate": 9.330210772833723e-06,
"loss": 0.0115,
"step": 9870
},
{
"epoch": 2.5,
"learning_rate": 9.28337236533958e-06,
"loss": 0.0331,
"step": 9880
},
{
"epoch": 2.5,
"learning_rate": 9.236533957845434e-06,
"loss": 0.0444,
"step": 9890
},
{
"epoch": 2.5,
"learning_rate": 9.18969555035129e-06,
"loss": 0.0245,
"step": 9900
},
{
"epoch": 2.51,
"learning_rate": 9.142857142857144e-06,
"loss": 0.0719,
"step": 9910
},
{
"epoch": 2.51,
"learning_rate": 9.096018735362998e-06,
"loss": 0.1355,
"step": 9920
},
{
"epoch": 2.51,
"learning_rate": 9.049180327868853e-06,
"loss": 0.0134,
"step": 9930
},
{
"epoch": 2.51,
"learning_rate": 9.002341920374707e-06,
"loss": 0.0017,
"step": 9940
},
{
"epoch": 2.52,
"learning_rate": 8.955503512880563e-06,
"loss": 0.0305,
"step": 9950
},
{
"epoch": 2.52,
"learning_rate": 8.908665105386417e-06,
"loss": 0.0054,
"step": 9960
},
{
"epoch": 2.52,
"learning_rate": 8.861826697892273e-06,
"loss": 0.0239,
"step": 9970
},
{
"epoch": 2.52,
"learning_rate": 8.814988290398127e-06,
"loss": 0.0231,
"step": 9980
},
{
"epoch": 2.53,
"learning_rate": 8.768149882903982e-06,
"loss": 0.0102,
"step": 9990
},
{
"epoch": 2.53,
"learning_rate": 8.721311475409836e-06,
"loss": 0.0162,
"step": 10000
},
{
"epoch": 2.53,
"learning_rate": 8.67447306791569e-06,
"loss": 0.0001,
"step": 10010
},
{
"epoch": 2.53,
"learning_rate": 8.627634660421546e-06,
"loss": 0.0112,
"step": 10020
},
{
"epoch": 2.54,
"learning_rate": 8.5807962529274e-06,
"loss": 0.0092,
"step": 10030
},
{
"epoch": 2.54,
"learning_rate": 8.533957845433257e-06,
"loss": 0.0627,
"step": 10040
},
{
"epoch": 2.54,
"learning_rate": 8.487119437939111e-06,
"loss": 0.0211,
"step": 10050
},
{
"epoch": 2.54,
"learning_rate": 8.440281030444965e-06,
"loss": 0.0671,
"step": 10060
},
{
"epoch": 2.55,
"learning_rate": 8.39344262295082e-06,
"loss": 0.1261,
"step": 10070
},
{
"epoch": 2.55,
"learning_rate": 8.346604215456674e-06,
"loss": 0.0993,
"step": 10080
},
{
"epoch": 2.55,
"learning_rate": 8.29976580796253e-06,
"loss": 0.1188,
"step": 10090
},
{
"epoch": 2.55,
"learning_rate": 8.252927400468384e-06,
"loss": 0.0096,
"step": 10100
},
{
"epoch": 2.56,
"learning_rate": 8.20608899297424e-06,
"loss": 0.0003,
"step": 10110
},
{
"epoch": 2.56,
"learning_rate": 8.159250585480095e-06,
"loss": 0.0516,
"step": 10120
},
{
"epoch": 2.56,
"learning_rate": 8.112412177985949e-06,
"loss": 0.002,
"step": 10130
},
{
"epoch": 2.56,
"learning_rate": 8.065573770491803e-06,
"loss": 0.1107,
"step": 10140
},
{
"epoch": 2.57,
"learning_rate": 8.01873536299766e-06,
"loss": 0.0615,
"step": 10150
},
{
"epoch": 2.57,
"learning_rate": 7.971896955503514e-06,
"loss": 0.0614,
"step": 10160
},
{
"epoch": 2.57,
"learning_rate": 7.925058548009368e-06,
"loss": 0.0036,
"step": 10170
},
{
"epoch": 2.57,
"learning_rate": 7.878220140515224e-06,
"loss": 0.0193,
"step": 10180
},
{
"epoch": 2.58,
"learning_rate": 7.831381733021077e-06,
"loss": 0.0329,
"step": 10190
},
{
"epoch": 2.58,
"learning_rate": 7.784543325526933e-06,
"loss": 0.0173,
"step": 10200
},
{
"epoch": 2.58,
"learning_rate": 7.737704918032787e-06,
"loss": 0.1301,
"step": 10210
},
{
"epoch": 2.58,
"learning_rate": 7.690866510538643e-06,
"loss": 0.0584,
"step": 10220
},
{
"epoch": 2.59,
"learning_rate": 7.644028103044497e-06,
"loss": 0.0812,
"step": 10230
},
{
"epoch": 2.59,
"learning_rate": 7.597189695550351e-06,
"loss": 0.0004,
"step": 10240
},
{
"epoch": 2.59,
"learning_rate": 7.550351288056207e-06,
"loss": 0.0061,
"step": 10250
},
{
"epoch": 2.59,
"learning_rate": 7.503512880562061e-06,
"loss": 0.007,
"step": 10260
},
{
"epoch": 2.6,
"learning_rate": 7.456674473067916e-06,
"loss": 0.0372,
"step": 10270
},
{
"epoch": 2.6,
"learning_rate": 7.40983606557377e-06,
"loss": 0.0818,
"step": 10280
},
{
"epoch": 2.6,
"learning_rate": 7.3629976580796264e-06,
"loss": 0.0091,
"step": 10290
},
{
"epoch": 2.6,
"learning_rate": 7.316159250585481e-06,
"loss": 0.0091,
"step": 10300
},
{
"epoch": 2.61,
"learning_rate": 7.269320843091336e-06,
"loss": 0.0287,
"step": 10310
},
{
"epoch": 2.61,
"learning_rate": 7.22248243559719e-06,
"loss": 0.0927,
"step": 10320
},
{
"epoch": 2.61,
"learning_rate": 7.1756440281030445e-06,
"loss": 0.0535,
"step": 10330
},
{
"epoch": 2.62,
"learning_rate": 7.1288056206089e-06,
"loss": 0.0701,
"step": 10340
},
{
"epoch": 2.62,
"learning_rate": 7.081967213114754e-06,
"loss": 0.0433,
"step": 10350
},
{
"epoch": 2.62,
"learning_rate": 7.03512880562061e-06,
"loss": 0.0224,
"step": 10360
},
{
"epoch": 2.62,
"learning_rate": 6.9882903981264635e-06,
"loss": 0.0715,
"step": 10370
},
{
"epoch": 2.63,
"learning_rate": 6.9414519906323195e-06,
"loss": 0.0189,
"step": 10380
},
{
"epoch": 2.63,
"learning_rate": 6.894613583138174e-06,
"loss": 0.0409,
"step": 10390
},
{
"epoch": 2.63,
"learning_rate": 6.847775175644028e-06,
"loss": 0.0225,
"step": 10400
},
{
"epoch": 2.63,
"learning_rate": 6.800936768149883e-06,
"loss": 0.0057,
"step": 10410
},
{
"epoch": 2.64,
"learning_rate": 6.754098360655738e-06,
"loss": 0.0297,
"step": 10420
},
{
"epoch": 2.64,
"learning_rate": 6.707259953161594e-06,
"loss": 0.0359,
"step": 10430
},
{
"epoch": 2.64,
"learning_rate": 6.660421545667447e-06,
"loss": 0.0418,
"step": 10440
},
{
"epoch": 2.64,
"learning_rate": 6.613583138173303e-06,
"loss": 0.0143,
"step": 10450
},
{
"epoch": 2.65,
"learning_rate": 6.566744730679157e-06,
"loss": 0.0338,
"step": 10460
},
{
"epoch": 2.65,
"learning_rate": 6.5199063231850125e-06,
"loss": 0.0093,
"step": 10470
},
{
"epoch": 2.65,
"learning_rate": 6.473067915690867e-06,
"loss": 0.0006,
"step": 10480
},
{
"epoch": 2.65,
"learning_rate": 6.426229508196721e-06,
"loss": 0.0915,
"step": 10490
},
{
"epoch": 2.66,
"learning_rate": 6.379391100702576e-06,
"loss": 0.0458,
"step": 10500
},
{
"epoch": 2.66,
"learning_rate": 6.332552693208431e-06,
"loss": 0.0507,
"step": 10510
},
{
"epoch": 2.66,
"learning_rate": 6.285714285714287e-06,
"loss": 0.0058,
"step": 10520
},
{
"epoch": 2.66,
"learning_rate": 6.238875878220141e-06,
"loss": 0.0936,
"step": 10530
},
{
"epoch": 2.67,
"learning_rate": 6.192037470725995e-06,
"loss": 0.0065,
"step": 10540
},
{
"epoch": 2.67,
"learning_rate": 6.1451990632318504e-06,
"loss": 0.0179,
"step": 10550
},
{
"epoch": 2.67,
"learning_rate": 6.098360655737706e-06,
"loss": 0.0421,
"step": 10560
},
{
"epoch": 2.67,
"learning_rate": 6.05152224824356e-06,
"loss": 0.044,
"step": 10570
},
{
"epoch": 2.68,
"learning_rate": 6.004683840749415e-06,
"loss": 0.0435,
"step": 10580
},
{
"epoch": 2.68,
"learning_rate": 5.95784543325527e-06,
"loss": 0.0684,
"step": 10590
},
{
"epoch": 2.68,
"learning_rate": 5.9110070257611245e-06,
"loss": 0.0676,
"step": 10600
},
{
"epoch": 2.68,
"learning_rate": 5.864168618266979e-06,
"loss": 0.0146,
"step": 10610
},
{
"epoch": 2.69,
"learning_rate": 5.817330210772834e-06,
"loss": 0.009,
"step": 10620
},
{
"epoch": 2.69,
"learning_rate": 5.770491803278689e-06,
"loss": 0.051,
"step": 10630
},
{
"epoch": 2.69,
"learning_rate": 5.7236533957845435e-06,
"loss": 0.0493,
"step": 10640
},
{
"epoch": 2.69,
"learning_rate": 5.676814988290399e-06,
"loss": 0.0834,
"step": 10650
},
{
"epoch": 2.7,
"learning_rate": 5.629976580796254e-06,
"loss": 0.046,
"step": 10660
},
{
"epoch": 2.7,
"learning_rate": 5.583138173302108e-06,
"loss": 0.0379,
"step": 10670
},
{
"epoch": 2.7,
"learning_rate": 5.5362997658079624e-06,
"loss": 0.0257,
"step": 10680
},
{
"epoch": 2.7,
"learning_rate": 5.489461358313818e-06,
"loss": 0.0731,
"step": 10690
},
{
"epoch": 2.71,
"learning_rate": 5.442622950819672e-06,
"loss": 0.0497,
"step": 10700
},
{
"epoch": 2.71,
"learning_rate": 5.395784543325527e-06,
"loss": 0.0503,
"step": 10710
},
{
"epoch": 2.71,
"learning_rate": 5.348946135831382e-06,
"loss": 0.0371,
"step": 10720
},
{
"epoch": 2.71,
"learning_rate": 5.302107728337237e-06,
"loss": 0.0248,
"step": 10730
},
{
"epoch": 2.72,
"learning_rate": 5.255269320843092e-06,
"loss": 0.0567,
"step": 10740
},
{
"epoch": 2.72,
"learning_rate": 5.208430913348947e-06,
"loss": 0.0174,
"step": 10750
},
{
"epoch": 2.72,
"learning_rate": 5.161592505854801e-06,
"loss": 0.0365,
"step": 10760
},
{
"epoch": 2.72,
"learning_rate": 5.1147540983606555e-06,
"loss": 0.0398,
"step": 10770
},
{
"epoch": 2.73,
"learning_rate": 5.067915690866511e-06,
"loss": 0.0726,
"step": 10780
},
{
"epoch": 2.73,
"learning_rate": 5.021077283372366e-06,
"loss": 0.005,
"step": 10790
},
{
"epoch": 2.73,
"learning_rate": 4.97423887587822e-06,
"loss": 0.0052,
"step": 10800
},
{
"epoch": 2.73,
"learning_rate": 4.927400468384075e-06,
"loss": 0.0058,
"step": 10810
},
{
"epoch": 2.74,
"learning_rate": 4.8805620608899304e-06,
"loss": 0.0418,
"step": 10820
},
{
"epoch": 2.74,
"learning_rate": 4.833723653395785e-06,
"loss": 0.0034,
"step": 10830
},
{
"epoch": 2.74,
"learning_rate": 4.786885245901639e-06,
"loss": 0.0836,
"step": 10840
},
{
"epoch": 2.74,
"learning_rate": 4.740046838407494e-06,
"loss": 0.0164,
"step": 10850
},
{
"epoch": 2.75,
"learning_rate": 4.693208430913349e-06,
"loss": 0.012,
"step": 10860
},
{
"epoch": 2.75,
"learning_rate": 4.646370023419204e-06,
"loss": 0.0097,
"step": 10870
},
{
"epoch": 2.75,
"learning_rate": 4.599531615925059e-06,
"loss": 0.0905,
"step": 10880
},
{
"epoch": 2.75,
"learning_rate": 4.552693208430914e-06,
"loss": 0.0001,
"step": 10890
},
{
"epoch": 2.76,
"learning_rate": 4.505854800936768e-06,
"loss": 0.0884,
"step": 10900
},
{
"epoch": 2.76,
"learning_rate": 4.4590163934426235e-06,
"loss": 0.0092,
"step": 10910
},
{
"epoch": 2.76,
"learning_rate": 4.412177985948478e-06,
"loss": 0.0029,
"step": 10920
},
{
"epoch": 2.76,
"learning_rate": 4.365339578454333e-06,
"loss": 0.0506,
"step": 10930
},
{
"epoch": 2.77,
"learning_rate": 4.318501170960187e-06,
"loss": 0.0418,
"step": 10940
},
{
"epoch": 2.77,
"learning_rate": 4.2716627634660424e-06,
"loss": 0.0133,
"step": 10950
},
{
"epoch": 2.77,
"learning_rate": 4.224824355971898e-06,
"loss": 0.0091,
"step": 10960
},
{
"epoch": 2.77,
"learning_rate": 4.177985948477752e-06,
"loss": 0.0128,
"step": 10970
},
{
"epoch": 2.78,
"learning_rate": 4.131147540983607e-06,
"loss": 0.0026,
"step": 10980
},
{
"epoch": 2.78,
"learning_rate": 4.084309133489462e-06,
"loss": 0.0391,
"step": 10990
},
{
"epoch": 2.78,
"learning_rate": 4.037470725995316e-06,
"loss": 0.0083,
"step": 11000
},
{
"epoch": 2.78,
"learning_rate": 3.990632318501171e-06,
"loss": 0.0242,
"step": 11010
},
{
"epoch": 2.79,
"learning_rate": 3.943793911007026e-06,
"loss": 0.0916,
"step": 11020
},
{
"epoch": 2.79,
"learning_rate": 3.89695550351288e-06,
"loss": 0.0312,
"step": 11030
},
{
"epoch": 2.79,
"learning_rate": 3.8501170960187355e-06,
"loss": 0.0755,
"step": 11040
},
{
"epoch": 2.79,
"learning_rate": 3.8032786885245906e-06,
"loss": 0.0622,
"step": 11050
},
{
"epoch": 2.8,
"learning_rate": 3.7564402810304454e-06,
"loss": 0.0095,
"step": 11060
},
{
"epoch": 2.8,
"learning_rate": 3.7096018735363e-06,
"loss": 0.0317,
"step": 11070
},
{
"epoch": 2.8,
"learning_rate": 3.6627634660421544e-06,
"loss": 0.0006,
"step": 11080
},
{
"epoch": 2.8,
"learning_rate": 3.615925058548009e-06,
"loss": 0.0016,
"step": 11090
},
{
"epoch": 2.81,
"learning_rate": 3.5690866510538643e-06,
"loss": 0.0346,
"step": 11100
},
{
"epoch": 2.81,
"learning_rate": 3.522248243559719e-06,
"loss": 0.0314,
"step": 11110
},
{
"epoch": 2.81,
"learning_rate": 3.4754098360655742e-06,
"loss": 0.0122,
"step": 11120
},
{
"epoch": 2.81,
"learning_rate": 3.428571428571429e-06,
"loss": 0.0074,
"step": 11130
},
{
"epoch": 2.82,
"learning_rate": 3.3817330210772837e-06,
"loss": 0.0229,
"step": 11140
},
{
"epoch": 2.82,
"learning_rate": 3.334894613583139e-06,
"loss": 0.0121,
"step": 11150
},
{
"epoch": 2.82,
"learning_rate": 3.2880562060889927e-06,
"loss": 0.0301,
"step": 11160
},
{
"epoch": 2.82,
"learning_rate": 3.241217798594848e-06,
"loss": 0.0583,
"step": 11170
},
{
"epoch": 2.83,
"learning_rate": 3.1943793911007026e-06,
"loss": 0.0208,
"step": 11180
},
{
"epoch": 2.83,
"learning_rate": 3.1475409836065574e-06,
"loss": 0.0136,
"step": 11190
},
{
"epoch": 2.83,
"learning_rate": 3.1007025761124125e-06,
"loss": 0.0006,
"step": 11200
},
{
"epoch": 2.84,
"learning_rate": 3.0538641686182673e-06,
"loss": 0.0463,
"step": 11210
},
{
"epoch": 2.84,
"learning_rate": 3.007025761124122e-06,
"loss": 0.0883,
"step": 11220
},
{
"epoch": 2.84,
"learning_rate": 2.9601873536299768e-06,
"loss": 0.058,
"step": 11230
},
{
"epoch": 2.84,
"learning_rate": 2.9133489461358315e-06,
"loss": 0.055,
"step": 11240
},
{
"epoch": 2.85,
"learning_rate": 2.8665105386416866e-06,
"loss": 0.0056,
"step": 11250
},
{
"epoch": 2.85,
"learning_rate": 2.819672131147541e-06,
"loss": 0.0012,
"step": 11260
},
{
"epoch": 2.85,
"learning_rate": 2.772833723653396e-06,
"loss": 0.0249,
"step": 11270
},
{
"epoch": 2.85,
"learning_rate": 2.725995316159251e-06,
"loss": 0.0435,
"step": 11280
},
{
"epoch": 2.86,
"learning_rate": 2.6791569086651056e-06,
"loss": 0.0086,
"step": 11290
},
{
"epoch": 2.86,
"learning_rate": 2.6323185011709603e-06,
"loss": 0.0265,
"step": 11300
},
{
"epoch": 2.86,
"learning_rate": 2.585480093676815e-06,
"loss": 0.0211,
"step": 11310
},
{
"epoch": 2.86,
"learning_rate": 2.53864168618267e-06,
"loss": 0.0356,
"step": 11320
},
{
"epoch": 2.87,
"learning_rate": 2.491803278688525e-06,
"loss": 0.0148,
"step": 11330
},
{
"epoch": 2.87,
"learning_rate": 2.4449648711943793e-06,
"loss": 0.0168,
"step": 11340
},
{
"epoch": 2.87,
"learning_rate": 2.3981264637002344e-06,
"loss": 0.0708,
"step": 11350
},
{
"epoch": 2.87,
"learning_rate": 2.351288056206089e-06,
"loss": 0.0801,
"step": 11360
},
{
"epoch": 2.88,
"learning_rate": 2.304449648711944e-06,
"loss": 0.0588,
"step": 11370
},
{
"epoch": 2.88,
"learning_rate": 2.2576112412177986e-06,
"loss": 0.0426,
"step": 11380
},
{
"epoch": 2.88,
"learning_rate": 2.2107728337236534e-06,
"loss": 0.0024,
"step": 11390
},
{
"epoch": 2.88,
"learning_rate": 2.1639344262295085e-06,
"loss": 0.0657,
"step": 11400
},
{
"epoch": 2.89,
"learning_rate": 2.1170960187353633e-06,
"loss": 0.0634,
"step": 11410
},
{
"epoch": 2.89,
"learning_rate": 2.070257611241218e-06,
"loss": 0.0675,
"step": 11420
},
{
"epoch": 2.89,
"learning_rate": 2.0234192037470728e-06,
"loss": 0.0348,
"step": 11430
},
{
"epoch": 2.89,
"learning_rate": 1.9765807962529275e-06,
"loss": 0.056,
"step": 11440
},
{
"epoch": 2.9,
"learning_rate": 1.9297423887587826e-06,
"loss": 0.0009,
"step": 11450
},
{
"epoch": 2.9,
"learning_rate": 1.882903981264637e-06,
"loss": 0.032,
"step": 11460
},
{
"epoch": 2.9,
"learning_rate": 1.836065573770492e-06,
"loss": 0.0774,
"step": 11470
},
{
"epoch": 2.9,
"learning_rate": 1.7892271662763469e-06,
"loss": 0.0356,
"step": 11480
},
{
"epoch": 2.91,
"learning_rate": 1.7423887587822016e-06,
"loss": 0.0704,
"step": 11490
},
{
"epoch": 2.91,
"learning_rate": 1.6955503512880561e-06,
"loss": 0.0831,
"step": 11500
},
{
"epoch": 2.91,
"learning_rate": 1.648711943793911e-06,
"loss": 0.0167,
"step": 11510
},
{
"epoch": 2.91,
"learning_rate": 1.601873536299766e-06,
"loss": 0.001,
"step": 11520
},
{
"epoch": 2.92,
"learning_rate": 1.5550351288056208e-06,
"loss": 0.0526,
"step": 11530
},
{
"epoch": 2.92,
"learning_rate": 1.5081967213114755e-06,
"loss": 0.0186,
"step": 11540
},
{
"epoch": 2.92,
"learning_rate": 1.4613583138173302e-06,
"loss": 0.0101,
"step": 11550
},
{
"epoch": 2.92,
"learning_rate": 1.4145199063231852e-06,
"loss": 0.0381,
"step": 11560
},
{
"epoch": 2.93,
"learning_rate": 1.36768149882904e-06,
"loss": 0.0469,
"step": 11570
},
{
"epoch": 2.93,
"learning_rate": 1.3208430913348949e-06,
"loss": 0.0172,
"step": 11580
},
{
"epoch": 2.93,
"learning_rate": 1.2740046838407494e-06,
"loss": 0.0461,
"step": 11590
},
{
"epoch": 2.93,
"learning_rate": 1.2271662763466043e-06,
"loss": 0.1021,
"step": 11600
},
{
"epoch": 2.94,
"learning_rate": 1.180327868852459e-06,
"loss": 0.0042,
"step": 11610
},
{
"epoch": 2.94,
"learning_rate": 1.133489461358314e-06,
"loss": 0.1074,
"step": 11620
},
{
"epoch": 2.94,
"learning_rate": 1.0866510538641688e-06,
"loss": 0.0034,
"step": 11630
},
{
"epoch": 2.94,
"learning_rate": 1.0398126463700235e-06,
"loss": 0.0093,
"step": 11640
},
{
"epoch": 2.95,
"learning_rate": 9.929742388758782e-07,
"loss": 0.109,
"step": 11650
},
{
"epoch": 2.95,
"learning_rate": 9.461358313817332e-07,
"loss": 0.0425,
"step": 11660
},
{
"epoch": 2.95,
"learning_rate": 8.992974238875879e-07,
"loss": 0.0044,
"step": 11670
},
{
"epoch": 2.95,
"learning_rate": 8.524590163934428e-07,
"loss": 0.0115,
"step": 11680
},
{
"epoch": 2.96,
"learning_rate": 8.056206088992975e-07,
"loss": 0.0088,
"step": 11690
},
{
"epoch": 2.96,
"learning_rate": 7.587822014051522e-07,
"loss": 0.0217,
"step": 11700
},
{
"epoch": 2.96,
"learning_rate": 7.119437939110071e-07,
"loss": 0.0134,
"step": 11710
},
{
"epoch": 2.96,
"learning_rate": 6.651053864168618e-07,
"loss": 0.0092,
"step": 11720
},
{
"epoch": 2.97,
"learning_rate": 6.182669789227167e-07,
"loss": 0.0343,
"step": 11730
},
{
"epoch": 2.97,
"learning_rate": 5.714285714285715e-07,
"loss": 0.0161,
"step": 11740
},
{
"epoch": 2.97,
"learning_rate": 5.245901639344262e-07,
"loss": 0.0648,
"step": 11750
},
{
"epoch": 2.97,
"learning_rate": 4.777517564402811e-07,
"loss": 0.0211,
"step": 11760
},
{
"epoch": 2.98,
"learning_rate": 4.3091334894613586e-07,
"loss": 0.065,
"step": 11770
},
{
"epoch": 2.98,
"learning_rate": 3.8407494145199065e-07,
"loss": 0.0272,
"step": 11780
},
{
"epoch": 2.98,
"learning_rate": 3.3723653395784544e-07,
"loss": 0.0004,
"step": 11790
},
{
"epoch": 2.98,
"learning_rate": 2.9039812646370023e-07,
"loss": 0.045,
"step": 11800
},
{
"epoch": 2.99,
"learning_rate": 2.43559718969555e-07,
"loss": 0.0074,
"step": 11810
},
{
"epoch": 2.99,
"learning_rate": 1.9672131147540984e-07,
"loss": 0.0054,
"step": 11820
},
{
"epoch": 2.99,
"learning_rate": 1.4988290398126465e-07,
"loss": 0.0024,
"step": 11830
},
{
"epoch": 2.99,
"learning_rate": 1.0304449648711944e-07,
"loss": 0.0012,
"step": 11840
},
{
"epoch": 3.0,
"learning_rate": 5.6206088992974245e-08,
"loss": 0.0285,
"step": 11850
},
{
"epoch": 3.0,
"learning_rate": 9.36768149882904e-09,
"loss": 0.018,
"step": 11860
},
{
"epoch": 3.0,
"eval_accuracy": 0.9982930298719772,
"eval_loss": 0.004252986516803503,
"eval_runtime": 97.4967,
"eval_samples_per_second": 72.105,
"eval_steps_per_second": 18.031,
"step": 11862
},
{
"epoch": 3.0,
"step": 11862,
"total_flos": 4.722263050454434e+18,
"train_loss": 0.19978058402062693,
"train_runtime": 6280.0341,
"train_samples_per_second": 30.222,
"train_steps_per_second": 1.889
}
],
"max_steps": 11862,
"num_train_epochs": 3,
"total_flos": 4.722263050454434e+18,
"trial_name": null,
"trial_params": null
}